Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2050343002: [regexp] Experimental support for regexp named captures (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Rebase Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
11 #include "src/ostreams.h" 11 #include "src/ostreams.h"
12 #include "src/regexp/jsregexp.h" 12 #include "src/regexp/jsregexp.h"
13 #include "src/utils.h" 13 #include "src/utils.h"
14 14
15 #ifdef V8_I18N_SUPPORT 15 #ifdef V8_I18N_SUPPORT
16 #include "unicode/uset.h" 16 #include "unicode/uset.h"
17 #endif // V8_I18N_SUPPORT 17 #endif // V8_I18N_SUPPORT
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)
24 : isolate_(isolate), 24 : isolate_(isolate),
25 zone_(zone), 25 zone_(zone),
26 error_(error), 26 error_(error),
27 captures_(NULL), 27 captures_(NULL),
28 named_captures_(NULL),
29 named_back_references_(NULL),
28 in_(in), 30 in_(in),
29 current_(kEndMarker), 31 current_(kEndMarker),
30 ignore_case_(flags & JSRegExp::kIgnoreCase), 32 ignore_case_(flags & JSRegExp::kIgnoreCase),
31 multiline_(flags & JSRegExp::kMultiline), 33 multiline_(flags & JSRegExp::kMultiline),
32 unicode_(flags & JSRegExp::kUnicode), 34 unicode_(flags & JSRegExp::kUnicode),
33 next_pos_(0), 35 next_pos_(0),
34 captures_started_(0), 36 captures_started_(0),
35 capture_count_(0), 37 capture_count_(0),
36 has_more_(true), 38 has_more_(true),
37 simple_(false), 39 simple_(false),
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
142 144
143 #define CHECK_FAILED /**/); \ 145 #define CHECK_FAILED /**/); \
144 if (failed_) return NULL; \ 146 if (failed_) return NULL; \
145 ((void)0 147 ((void)0
146 148
147 149
148 // Pattern :: 150 // Pattern ::
149 // Disjunction 151 // Disjunction
150 RegExpTree* RegExpParser::ParsePattern() { 152 RegExpTree* RegExpParser::ParsePattern() {
151 RegExpTree* result = ParseDisjunction(CHECK_FAILED); 153 RegExpTree* result = ParseDisjunction(CHECK_FAILED);
154 PatchNamedBackReferences(CHECK_FAILED);
152 DCHECK(!has_more()); 155 DCHECK(!has_more());
153 // If the result of parsing is a literal string atom, and it has the 156 // If the result of parsing is a literal string atom, and it has the
154 // same length as the input, then the atom is identical to the input. 157 // same length as the input, then the atom is identical to the input.
155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { 158 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {
156 simple_ = true; 159 simple_ = true;
157 } 160 }
158 return result; 161 return result;
159 } 162 }
160 163
161 164
162 // Disjunction :: 165 // Disjunction ::
163 // Alternative 166 // Alternative
164 // Alternative | Disjunction 167 // Alternative | Disjunction
165 // Alternative :: 168 // Alternative ::
166 // [empty] 169 // [empty]
167 // Term Alternative 170 // Term Alternative
168 // Term :: 171 // Term ::
169 // Assertion 172 // Assertion
170 // Atom 173 // Atom
171 // Atom Quantifier 174 // Atom Quantifier
172 RegExpTree* RegExpParser::ParseDisjunction() { 175 RegExpTree* RegExpParser::ParseDisjunction() {
173 // Used to store current state while parsing subexpressions. 176 // Used to store current state while parsing subexpressions.
174 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, 177 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
175 ignore_case(), unicode(), zone()); 178 nullptr, ignore_case(), unicode(), zone());
176 RegExpParserState* state = &initial_state; 179 RegExpParserState* state = &initial_state;
177 // Cache the builder in a local variable for quick access. 180 // Cache the builder in a local variable for quick access.
178 RegExpBuilder* builder = initial_state.builder(); 181 RegExpBuilder* builder = initial_state.builder();
179 while (true) { 182 while (true) {
180 switch (current()) { 183 switch (current()) {
181 case kEndMarker: 184 case kEndMarker:
182 if (state->IsSubexpression()) { 185 if (state->IsSubexpression()) {
183 // Inside a parenthesized group when hitting end of input. 186 // Inside a parenthesized group when hitting end of input.
184 return ReportError(CStrVector("Unterminated group")); 187 return ReportError(CStrVector("Unterminated group"));
185 } 188 }
(...skipping 11 matching lines...) Expand all
197 // regexp atom. 200 // regexp atom.
198 RegExpTree* body = builder->ToRegExp(); 201 RegExpTree* body = builder->ToRegExp();
199 202
200 int end_capture_index = captures_started(); 203 int end_capture_index = captures_started();
201 204
202 int capture_index = state->capture_index(); 205 int capture_index = state->capture_index();
203 SubexpressionType group_type = state->group_type(); 206 SubexpressionType group_type = state->group_type();
204 207
205 // Build result of subexpression. 208 // Build result of subexpression.
206 if (group_type == CAPTURE) { 209 if (group_type == CAPTURE) {
210 if (state->IsNamedCapture()) {
211 CreateNamedCaptureAtIndex(state->capture_name(),
212 capture_index CHECK_FAILED);
213 }
207 RegExpCapture* capture = GetCapture(capture_index); 214 RegExpCapture* capture = GetCapture(capture_index);
208 capture->set_body(body); 215 capture->set_body(body);
209 body = capture; 216 body = capture;
210 } else if (group_type != GROUPING) { 217 } else if (group_type != GROUPING) {
211 DCHECK(group_type == POSITIVE_LOOKAROUND || 218 DCHECK(group_type == POSITIVE_LOOKAROUND ||
212 group_type == NEGATIVE_LOOKAROUND); 219 group_type == NEGATIVE_LOOKAROUND);
213 bool is_positive = (group_type == POSITIVE_LOOKAROUND); 220 bool is_positive = (group_type == POSITIVE_LOOKAROUND);
214 body = new (zone()) RegExpLookaround( 221 body = new (zone()) RegExpLookaround(
215 body, is_positive, end_capture_index - capture_index, 222 body, is_positive, end_capture_index - capture_index,
216 capture_index, state->lookaround_type()); 223 capture_index, state->lookaround_type());
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
261 new (zone()) ZoneList<CharacterRange>(2, zone()); 268 new (zone()) ZoneList<CharacterRange>(2, zone());
262 CharacterRange::AddClassEscape('.', ranges, zone()); 269 CharacterRange::AddClassEscape('.', ranges, zone());
263 RegExpCharacterClass* cc = 270 RegExpCharacterClass* cc =
264 new (zone()) RegExpCharacterClass(ranges, false); 271 new (zone()) RegExpCharacterClass(ranges, false);
265 builder->AddCharacterClass(cc); 272 builder->AddCharacterClass(cc);
266 break; 273 break;
267 } 274 }
268 case '(': { 275 case '(': {
269 SubexpressionType subexpr_type = CAPTURE; 276 SubexpressionType subexpr_type = CAPTURE;
270 RegExpLookaround::Type lookaround_type = state->lookaround_type(); 277 RegExpLookaround::Type lookaround_type = state->lookaround_type();
278 bool is_named_capture = false;
271 Advance(); 279 Advance();
272 if (current() == '?') { 280 if (current() == '?') {
273 switch (Next()) { 281 switch (Next()) {
274 case ':': 282 case ':':
275 subexpr_type = GROUPING; 283 subexpr_type = GROUPING;
284 Advance(2);
276 break; 285 break;
277 case '=': 286 case '=':
278 lookaround_type = RegExpLookaround::LOOKAHEAD; 287 lookaround_type = RegExpLookaround::LOOKAHEAD;
279 subexpr_type = POSITIVE_LOOKAROUND; 288 subexpr_type = POSITIVE_LOOKAROUND;
289 Advance(2);
280 break; 290 break;
281 case '!': 291 case '!':
282 lookaround_type = RegExpLookaround::LOOKAHEAD; 292 lookaround_type = RegExpLookaround::LOOKAHEAD;
283 subexpr_type = NEGATIVE_LOOKAROUND; 293 subexpr_type = NEGATIVE_LOOKAROUND;
294 Advance(2);
284 break; 295 break;
285 case '<': 296 case '<':
297 Advance();
286 if (FLAG_harmony_regexp_lookbehind) { 298 if (FLAG_harmony_regexp_lookbehind) {
287 Advance();
288 lookaround_type = RegExpLookaround::LOOKBEHIND;
289 if (Next() == '=') { 299 if (Next() == '=') {
290 subexpr_type = POSITIVE_LOOKAROUND; 300 subexpr_type = POSITIVE_LOOKAROUND;
301 lookaround_type = RegExpLookaround::LOOKBEHIND;
302 Advance(2);
291 break; 303 break;
292 } else if (Next() == '!') { 304 } else if (Next() == '!') {
293 subexpr_type = NEGATIVE_LOOKAROUND; 305 subexpr_type = NEGATIVE_LOOKAROUND;
306 lookaround_type = RegExpLookaround::LOOKBEHIND;
307 Advance(2);
294 break; 308 break;
295 } 309 }
296 } 310 }
311 if (FLAG_harmony_regexp_named_captures && unicode()) {
312 is_named_capture = true;
313 Advance();
314 break;
315 }
297 // Fall through. 316 // Fall through.
298 default: 317 default:
299 return ReportError(CStrVector("Invalid group")); 318 return ReportError(CStrVector("Invalid group"));
300 } 319 }
301 Advance(2); 320 }
302 } else { 321
322 const ZoneVector<uc16>* capture_name = nullptr;
323 if (subexpr_type == CAPTURE) {
303 if (captures_started_ >= kMaxCaptures) { 324 if (captures_started_ >= kMaxCaptures) {
304 return ReportError(CStrVector("Too many captures")); 325 return ReportError(CStrVector("Too many captures"));
305 } 326 }
306 captures_started_++; 327 captures_started_++;
328
329 if (is_named_capture) {
330 capture_name = ParseCaptureGroupName(CHECK_FAILED);
331 }
307 } 332 }
308 // Store current state and begin new disjunction parsing. 333 // Store current state and begin new disjunction parsing.
309 state = new (zone()) RegExpParserState( 334 state = new (zone()) RegExpParserState(
310 state, subexpr_type, lookaround_type, captures_started_, 335 state, subexpr_type, lookaround_type, captures_started_,
311 ignore_case(), unicode(), zone()); 336 capture_name, ignore_case(), unicode(), zone());
312 builder = state->builder(); 337 builder = state->builder();
313 continue; 338 continue;
314 } 339 }
315 case '[': { 340 case '[': {
316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); 341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);
317 builder->AddCharacterClass(cc->AsCharacterClass()); 342 builder->AddCharacterClass(cc->AsCharacterClass());
318 break; 343 break;
319 } 344 }
320 // Atom :: 345 // Atom ::
321 // \ AtomEscape 346 // \ AtomEscape
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 if (unicode()) { 434 if (unicode()) {
410 return ReportError(CStrVector("Invalid escape")); 435 return ReportError(CStrVector("Invalid escape"));
411 } 436 }
412 uc32 first_digit = Next(); 437 uc32 first_digit = Next();
413 if (first_digit == '8' || first_digit == '9') { 438 if (first_digit == '8' || first_digit == '9') {
414 builder->AddCharacter(first_digit); 439 builder->AddCharacter(first_digit);
415 Advance(2); 440 Advance(2);
416 break; 441 break;
417 } 442 }
418 } 443 }
419 // FALLTHROUGH 444 // Fall through.
420 case '0': { 445 case '0': {
421 Advance(); 446 Advance();
422 if (unicode() && Next() >= '0' && Next() <= '9') { 447 if (unicode() && Next() >= '0' && Next() <= '9') {
423 // With /u, decimal escape with leading 0 are not parsed as octal. 448 // With /u, decimal escape with leading 0 are not parsed as octal.
424 return ReportError(CStrVector("Invalid decimal escape")); 449 return ReportError(CStrVector("Invalid decimal escape"));
425 } 450 }
426 uc32 octal = ParseOctalLiteral(); 451 uc32 octal = ParseOctalLiteral();
427 builder->AddCharacter(octal); 452 builder->AddCharacter(octal);
428 break; 453 break;
429 } 454 }
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
490 if (ParseUnicodeEscape(&value)) { 515 if (ParseUnicodeEscape(&value)) {
491 builder->AddEscapedUnicodeCharacter(value); 516 builder->AddEscapedUnicodeCharacter(value);
492 } else if (!unicode()) { 517 } else if (!unicode()) {
493 builder->AddCharacter('u'); 518 builder->AddCharacter('u');
494 } else { 519 } else {
495 // With /u, invalid escapes are not treated as identity escapes. 520 // With /u, invalid escapes are not treated as identity escapes.
496 return ReportError(CStrVector("Invalid unicode escape")); 521 return ReportError(CStrVector("Invalid unicode escape"));
497 } 522 }
498 break; 523 break;
499 } 524 }
525 case 'k':
526 if (FLAG_harmony_regexp_named_captures && unicode()) {
527 Advance(2);
528 ParseNamedBackReference(builder, state CHECK_FAILED);
529 break;
530 }
531 // Fall through.
500 default: 532 default:
501 Advance(); 533 Advance();
502 // With /u, no identity escapes except for syntax characters 534 // With /u, no identity escapes except for syntax characters
503 // are allowed. Otherwise, all identity escapes are allowed. 535 // are allowed. Otherwise, all identity escapes are allowed.
504 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { 536 if (!unicode() || IsSyntaxCharacterOrSlash(current())) {
505 builder->AddCharacter(current()); 537 builder->AddCharacter(current());
506 Advance(); 538 Advance();
507 } else { 539 } else {
508 return ReportError(CStrVector("Invalid escape")); 540 return ReportError(CStrVector("Invalid escape"));
509 } 541 }
510 break; 542 break;
511 } 543 }
512 break; 544 break;
513 case '{': { 545 case '{': {
514 int dummy; 546 int dummy;
515 bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED); 547 bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
516 if (parsed) return ReportError(CStrVector("Nothing to repeat")); 548 if (parsed) return ReportError(CStrVector("Nothing to repeat"));
517 // fallthrough 549 // Fall through.
518 } 550 }
519 case '}': 551 case '}':
520 case ']': 552 case ']':
521 if (unicode()) { 553 if (unicode()) {
522 return ReportError(CStrVector("Lone quantifier brackets")); 554 return ReportError(CStrVector("Lone quantifier brackets"));
523 } 555 }
524 // fallthrough 556 // Fall through.
525 default: 557 default:
526 builder->AddUnicodeCharacter(current()); 558 builder->AddUnicodeCharacter(current());
527 Advance(); 559 Advance();
528 break; 560 break;
529 } // end switch(current()) 561 } // end switch(current())
530 562
531 int min; 563 int min;
532 int max; 564 int max;
533 switch (current()) { 565 switch (current()) {
534 // QuantifierPrefix :: 566 // QuantifierPrefix ::
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
668 } 700 }
669 if (value > capture_count_) { 701 if (value > capture_count_) {
670 Reset(start); 702 Reset(start);
671 return false; 703 return false;
672 } 704 }
673 } 705 }
674 *index_out = value; 706 *index_out = value;
675 return true; 707 return true;
676 } 708 }
677 709
710 static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) {
711 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
712 v->push_back(code_unit);
713 } else {
714 v->push_back(unibrow::Utf16::LeadSurrogate(code_unit));
715 v->push_back(unibrow::Utf16::TrailSurrogate(code_unit));
716 }
717 }
718
719 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
720 DCHECK(FLAG_harmony_regexp_named_captures);
721 DCHECK(unicode());
722
723 ZoneVector<uc16>* name =
724 new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());
725
726 bool at_start = true;
727 while (true) {
728 uc32 c = current();
729 Advance();
730
731 // Convert unicode escapes.
732 if (c == '\\' && current() == 'u') {
733 Advance();
734 if (!ParseUnicodeEscape(&c)) {
735 ReportError(CStrVector("Invalid Unicode escape sequence"));
736 return nullptr;
737 }
738 }
739
740 if (at_start) {
741 if (!IdentifierStart::Is(c)) {
742 ReportError(CStrVector("Invalid capture group name"));
743 return nullptr;
744 }
745 push_code_unit(name, c);
746 at_start = false;
747 } else {
748 if (c == '>') {
749 break;
750 } else if (IdentifierPart::Is(c)) {
751 push_code_unit(name, c);
752 } else {
753 ReportError(CStrVector("Invalid capture group name"));
754 return nullptr;
755 }
756 }
757 }
758
759 return name;
760 }
761
762 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
763 int index) {
764 DCHECK(FLAG_harmony_regexp_named_captures);
765 DCHECK(unicode());
766 DCHECK(0 < index && index <= captures_started_);
767 DCHECK_NOT_NULL(name);
768
769 if (named_captures_ == nullptr) {
770 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());
771 } else {
772 // Check for duplicates and bail if we find any.
773 for (const auto& named_capture : *named_captures_) {
774 if (*named_capture->name() == *name) {
775 ReportError(CStrVector("Duplicate capture group name"));
776 return false;
777 }
778 }
779 }
780
781 RegExpCapture* capture = GetCapture(index);
782 DCHECK(capture->name() == nullptr);
783
784 capture->set_name(name);
785 named_captures_->Add(capture, zone());
786
787 return true;
788 }
789
790 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
791 RegExpParserState* state) {
792 // The parser is assumed to be on the '<' in \k<name>.
793 if (current() != '<') {
794 ReportError(CStrVector("Invalid named reference"));
795 return false;
796 }
797
798 Advance();
799 const ZoneVector<uc16>* name = ParseCaptureGroupName();
800 if (name == nullptr) {
801 return false;
802 }
803
804 if (state->IsInsideCaptureGroup(name)) {
805 builder->AddEmpty();
806 } else {
807 RegExpBackReference* atom = new (zone()) RegExpBackReference();
808 atom->set_name(name);
809
810 builder->AddAtom(atom);
811
812 if (named_back_references_ == nullptr) {
813 named_back_references_ =
814 new (zone()) ZoneList<RegExpBackReference*>(1, zone());
815 }
816 named_back_references_->Add(atom, zone());
817 }
818
819 return true;
820 }
821
822 void RegExpParser::PatchNamedBackReferences() {
823 if (named_back_references_ == nullptr) return;
824
825 if (named_captures_ == nullptr) {
826 ReportError(CStrVector("Invalid named capture referenced"));
827 return;
828 }
829
830 // Look up and patch the actual capture for each named back reference.
831 // TODO(jgruber): O(n^2), optimize if necessary.
832
833 for (int i = 0; i < named_back_references_->length(); i++) {
834 RegExpBackReference* ref = named_back_references_->at(i);
835
836 int index = -1;
837 for (const auto& capture : *named_captures_) {
838 if (*capture->name() == *ref->name()) {
839 index = capture->index();
840 break;
841 }
842 }
843
844 if (index == -1) {
845 ReportError(CStrVector("Invalid named capture referenced"));
846 return;
847 }
848
849 ref->set_capture(GetCapture(index));
850 }
851 }
678 852
679 RegExpCapture* RegExpParser::GetCapture(int index) { 853 RegExpCapture* RegExpParser::GetCapture(int index) {
680 // The index for the capture groups are one-based. Its index in the list is 854 // The index for the capture groups are one-based. Its index in the list is
681 // zero-based. 855 // zero-based.
682 int know_captures = 856 int know_captures =
683 is_scanned_for_captures_ ? capture_count_ : captures_started_; 857 is_scanned_for_captures_ ? capture_count_ : captures_started_;
684 DCHECK(index <= know_captures); 858 DCHECK(index <= know_captures);
685 if (captures_ == NULL) { 859 if (captures_ == NULL) {
686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); 860 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
687 } 861 }
688 while (captures_->length() < know_captures) { 862 while (captures_->length() < know_captures) {
689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); 863 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
690 } 864 }
691 return captures_->at(index - 1); 865 return captures_->at(index - 1);
692 } 866 }
693 867
868 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
869 if (named_captures_ == nullptr || named_captures_->is_empty())
870 return Handle<FixedArray>();
871
872 Factory* factory = isolate()->factory();
873
874 int len = named_captures_->length() * 2;
875 Handle<FixedArray> array = factory->NewFixedArray(len);
876
877 for (int i = 0; i < named_captures_->length(); i++) {
878 RegExpCapture* capture = named_captures_->at(i);
879 MaybeHandle<String> name = factory->NewStringFromTwoByte(capture->name());
880 array->set(i * 2, *name.ToHandleChecked());
881 array->set(i * 2 + 1, Smi::FromInt(capture->index()));
882 }
883
884 return array;
885 }
694 886
695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { 887 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { 888 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
697 if (s->group_type() != CAPTURE) continue; 889 if (s->group_type() != CAPTURE) continue;
698 // Return true if we found the matching capture index. 890 // Return true if we found the matching capture index.
699 if (index == s->capture_index()) return true; 891 if (index == s->capture_index()) return true;
700 // Abort if index is larger than what has been parsed up till this state. 892 // Abort if index is larger than what has been parsed up till this state.
701 if (index > s->capture_index()) return false; 893 if (index > s->capture_index()) return false;
702 } 894 }
703 return false; 895 return false;
704 } 896 }
705 897
898 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(
899 const ZoneVector<uc16>* name) {
900 DCHECK_NOT_NULL(name);
901 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
902 if (s->capture_name() == nullptr) continue;
903 if (*s->capture_name() == *name) return true;
904 }
905 return false;
906 }
706 907
707 // QuantifierPrefix :: 908 // QuantifierPrefix ::
708 // { DecimalDigits } 909 // { DecimalDigits }
709 // { DecimalDigits , } 910 // { DecimalDigits , }
710 // { DecimalDigits , DecimalDigits } 911 // { DecimalDigits , DecimalDigits }
711 // 912 //
712 // Returns true if parsing succeeds, and set the min_out and max_out 913 // Returns true if parsing succeeds, and set the min_out and max_out
713 // values. Values are truncated to RegExpTree::kInfinity if they overflow. 914 // values. Values are truncated to RegExpTree::kInfinity if they overflow.
714 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { 915 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
715 DCHECK_EQ(current(), '{'); 916 DCHECK_EQ(current(), '{');
(...skipping 412 matching lines...) Expand 10 before | Expand all | Expand 10 after
1128 default: 1329 default:
1129 first = ParseClassCharacterEscape(CHECK_FAILED); 1330 first = ParseClassCharacterEscape(CHECK_FAILED);
1130 } 1331 }
1131 } else { 1332 } else {
1132 Advance(); 1333 Advance();
1133 } 1334 }
1134 1335
1135 return CharacterRange::Singleton(first); 1336 return CharacterRange::Singleton(first);
1136 } 1337 }
1137 1338
1138
1139 static const uc16 kNoCharClass = 0; 1339 static const uc16 kNoCharClass = 0;
1140 1340
1141 // Adds range or pre-defined character class to character ranges. 1341 // Adds range or pre-defined character class to character ranges.
1142 // If char_class is not kInvalidClass, it's interpreted as a class 1342 // If char_class is not kInvalidClass, it's interpreted as a class
1143 // escape (i.e., 's' means whitespace, from '\s'). 1343 // escape (i.e., 's' means whitespace, from '\s').
1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, 1344 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
1145 uc16 char_class, CharacterRange range, 1345 uc16 char_class, CharacterRange range,
1146 Zone* zone) { 1346 Zone* zone) {
1147 if (char_class != kNoCharClass) { 1347 if (char_class != kNoCharClass) {
1148 CharacterRange::AddClassEscape(char_class, ranges, zone); 1348 CharacterRange::AddClassEscape(char_class, ranges, zone);
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
1261 DCHECK(result->error.is_null()); 1461 DCHECK(result->error.is_null());
1262 if (FLAG_trace_regexp_parser) { 1462 if (FLAG_trace_regexp_parser) {
1263 OFStream os(stdout); 1463 OFStream os(stdout);
1264 tree->Print(os, zone); 1464 tree->Print(os, zone);
1265 os << "\n"; 1465 os << "\n";
1266 } 1466 }
1267 result->tree = tree; 1467 result->tree = tree;
1268 int capture_count = parser.captures_started(); 1468 int capture_count = parser.captures_started();
1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; 1469 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
1270 result->contains_anchor = parser.contains_anchor(); 1470 result->contains_anchor = parser.contains_anchor();
1471 result->capture_name_map = parser.CreateCaptureNameMap();
1271 result->capture_count = capture_count; 1472 result->capture_count = capture_count;
1272 } 1473 }
1273 return !parser.failed(); 1474 return !parser.failed();
1274 } 1475 }
1275 1476
1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) 1477 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)
1277 : zone_(zone), 1478 : zone_(zone),
1278 pending_empty_(false), 1479 pending_empty_(false),
1279 ignore_case_(ignore_case), 1480 ignore_case_(ignore_case),
1280 unicode_(unicode), 1481 unicode_(unicode),
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
1557 return false; 1758 return false;
1558 } 1759 }
1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1760 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1560 zone()); 1761 zone());
1561 LAST(ADD_TERM); 1762 LAST(ADD_TERM);
1562 return true; 1763 return true;
1563 } 1764 }
1564 1765
1565 } // namespace internal 1766 } // namespace internal
1566 } // namespace v8 1767 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698