OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
11 #include "src/ostreams.h" | 11 #include "src/ostreams.h" |
12 #include "src/regexp/jsregexp.h" | 12 #include "src/regexp/jsregexp.h" |
13 #include "src/utils.h" | 13 #include "src/utils.h" |
14 | 14 |
15 #ifdef V8_I18N_SUPPORT | 15 #ifdef V8_I18N_SUPPORT |
16 #include "unicode/uset.h" | 16 #include "unicode/uset.h" |
17 #endif // V8_I18N_SUPPORT | 17 #endif // V8_I18N_SUPPORT |
18 | 18 |
19 namespace v8 { | 19 namespace v8 { |
20 namespace internal { | 20 namespace internal { |
21 | 21 |
22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) | 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) |
24 : isolate_(isolate), | 24 : isolate_(isolate), |
25 zone_(zone), | 25 zone_(zone), |
26 error_(error), | 26 error_(error), |
27 captures_(NULL), | 27 captures_(NULL), |
| 28 named_captures_(NULL), |
| 29 named_back_references_(NULL), |
28 in_(in), | 30 in_(in), |
29 current_(kEndMarker), | 31 current_(kEndMarker), |
30 ignore_case_(flags & JSRegExp::kIgnoreCase), | 32 ignore_case_(flags & JSRegExp::kIgnoreCase), |
31 multiline_(flags & JSRegExp::kMultiline), | 33 multiline_(flags & JSRegExp::kMultiline), |
32 unicode_(flags & JSRegExp::kUnicode), | 34 unicode_(flags & JSRegExp::kUnicode), |
33 next_pos_(0), | 35 next_pos_(0), |
34 captures_started_(0), | 36 captures_started_(0), |
35 capture_count_(0), | 37 capture_count_(0), |
36 has_more_(true), | 38 has_more_(true), |
37 simple_(false), | 39 simple_(false), |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
142 | 144 |
143 #define CHECK_FAILED /**/); \ | 145 #define CHECK_FAILED /**/); \ |
144 if (failed_) return NULL; \ | 146 if (failed_) return NULL; \ |
145 ((void)0 | 147 ((void)0 |
146 | 148 |
147 | 149 |
148 // Pattern :: | 150 // Pattern :: |
149 // Disjunction | 151 // Disjunction |
150 RegExpTree* RegExpParser::ParsePattern() { | 152 RegExpTree* RegExpParser::ParsePattern() { |
151 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 153 RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
| 154 PatchNamedBackReferences(CHECK_FAILED); |
152 DCHECK(!has_more()); | 155 DCHECK(!has_more()); |
153 // If the result of parsing is a literal string atom, and it has the | 156 // If the result of parsing is a literal string atom, and it has the |
154 // same length as the input, then the atom is identical to the input. | 157 // same length as the input, then the atom is identical to the input. |
155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 158 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
156 simple_ = true; | 159 simple_ = true; |
157 } | 160 } |
158 return result; | 161 return result; |
159 } | 162 } |
160 | 163 |
161 | 164 |
162 // Disjunction :: | 165 // Disjunction :: |
163 // Alternative | 166 // Alternative |
164 // Alternative | Disjunction | 167 // Alternative | Disjunction |
165 // Alternative :: | 168 // Alternative :: |
166 // [empty] | 169 // [empty] |
167 // Term Alternative | 170 // Term Alternative |
168 // Term :: | 171 // Term :: |
169 // Assertion | 172 // Assertion |
170 // Atom | 173 // Atom |
171 // Atom Quantifier | 174 // Atom Quantifier |
172 RegExpTree* RegExpParser::ParseDisjunction() { | 175 RegExpTree* RegExpParser::ParseDisjunction() { |
173 // Used to store current state while parsing subexpressions. | 176 // Used to store current state while parsing subexpressions. |
174 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | 177 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
175 ignore_case(), unicode(), zone()); | 178 nullptr, ignore_case(), unicode(), zone()); |
176 RegExpParserState* state = &initial_state; | 179 RegExpParserState* state = &initial_state; |
177 // Cache the builder in a local variable for quick access. | 180 // Cache the builder in a local variable for quick access. |
178 RegExpBuilder* builder = initial_state.builder(); | 181 RegExpBuilder* builder = initial_state.builder(); |
179 while (true) { | 182 while (true) { |
180 switch (current()) { | 183 switch (current()) { |
181 case kEndMarker: | 184 case kEndMarker: |
182 if (state->IsSubexpression()) { | 185 if (state->IsSubexpression()) { |
183 // Inside a parenthesized group when hitting end of input. | 186 // Inside a parenthesized group when hitting end of input. |
184 return ReportError(CStrVector("Unterminated group")); | 187 return ReportError(CStrVector("Unterminated group")); |
185 } | 188 } |
(...skipping 11 matching lines...) Expand all Loading... |
197 // regexp atom. | 200 // regexp atom. |
198 RegExpTree* body = builder->ToRegExp(); | 201 RegExpTree* body = builder->ToRegExp(); |
199 | 202 |
200 int end_capture_index = captures_started(); | 203 int end_capture_index = captures_started(); |
201 | 204 |
202 int capture_index = state->capture_index(); | 205 int capture_index = state->capture_index(); |
203 SubexpressionType group_type = state->group_type(); | 206 SubexpressionType group_type = state->group_type(); |
204 | 207 |
205 // Build result of subexpression. | 208 // Build result of subexpression. |
206 if (group_type == CAPTURE) { | 209 if (group_type == CAPTURE) { |
| 210 if (state->IsNamedCapture()) { |
| 211 CreateNamedCaptureAtIndex(state->capture_name(), |
| 212 capture_index CHECK_FAILED); |
| 213 } |
207 RegExpCapture* capture = GetCapture(capture_index); | 214 RegExpCapture* capture = GetCapture(capture_index); |
208 capture->set_body(body); | 215 capture->set_body(body); |
209 body = capture; | 216 body = capture; |
210 } else if (group_type != GROUPING) { | 217 } else if (group_type != GROUPING) { |
211 DCHECK(group_type == POSITIVE_LOOKAROUND || | 218 DCHECK(group_type == POSITIVE_LOOKAROUND || |
212 group_type == NEGATIVE_LOOKAROUND); | 219 group_type == NEGATIVE_LOOKAROUND); |
213 bool is_positive = (group_type == POSITIVE_LOOKAROUND); | 220 bool is_positive = (group_type == POSITIVE_LOOKAROUND); |
214 body = new (zone()) RegExpLookaround( | 221 body = new (zone()) RegExpLookaround( |
215 body, is_positive, end_capture_index - capture_index, | 222 body, is_positive, end_capture_index - capture_index, |
216 capture_index, state->lookaround_type()); | 223 capture_index, state->lookaround_type()); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
261 new (zone()) ZoneList<CharacterRange>(2, zone()); | 268 new (zone()) ZoneList<CharacterRange>(2, zone()); |
262 CharacterRange::AddClassEscape('.', ranges, zone()); | 269 CharacterRange::AddClassEscape('.', ranges, zone()); |
263 RegExpCharacterClass* cc = | 270 RegExpCharacterClass* cc = |
264 new (zone()) RegExpCharacterClass(ranges, false); | 271 new (zone()) RegExpCharacterClass(ranges, false); |
265 builder->AddCharacterClass(cc); | 272 builder->AddCharacterClass(cc); |
266 break; | 273 break; |
267 } | 274 } |
268 case '(': { | 275 case '(': { |
269 SubexpressionType subexpr_type = CAPTURE; | 276 SubexpressionType subexpr_type = CAPTURE; |
270 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | 277 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
| 278 bool is_named_capture = false; |
271 Advance(); | 279 Advance(); |
272 if (current() == '?') { | 280 if (current() == '?') { |
273 switch (Next()) { | 281 switch (Next()) { |
274 case ':': | 282 case ':': |
275 subexpr_type = GROUPING; | 283 subexpr_type = GROUPING; |
| 284 Advance(2); |
276 break; | 285 break; |
277 case '=': | 286 case '=': |
278 lookaround_type = RegExpLookaround::LOOKAHEAD; | 287 lookaround_type = RegExpLookaround::LOOKAHEAD; |
279 subexpr_type = POSITIVE_LOOKAROUND; | 288 subexpr_type = POSITIVE_LOOKAROUND; |
| 289 Advance(2); |
280 break; | 290 break; |
281 case '!': | 291 case '!': |
282 lookaround_type = RegExpLookaround::LOOKAHEAD; | 292 lookaround_type = RegExpLookaround::LOOKAHEAD; |
283 subexpr_type = NEGATIVE_LOOKAROUND; | 293 subexpr_type = NEGATIVE_LOOKAROUND; |
| 294 Advance(2); |
284 break; | 295 break; |
285 case '<': | 296 case '<': |
| 297 Advance(); |
286 if (FLAG_harmony_regexp_lookbehind) { | 298 if (FLAG_harmony_regexp_lookbehind) { |
287 Advance(); | |
288 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
289 if (Next() == '=') { | 299 if (Next() == '=') { |
290 subexpr_type = POSITIVE_LOOKAROUND; | 300 subexpr_type = POSITIVE_LOOKAROUND; |
| 301 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 302 Advance(2); |
291 break; | 303 break; |
292 } else if (Next() == '!') { | 304 } else if (Next() == '!') { |
293 subexpr_type = NEGATIVE_LOOKAROUND; | 305 subexpr_type = NEGATIVE_LOOKAROUND; |
| 306 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 307 Advance(2); |
294 break; | 308 break; |
295 } | 309 } |
296 } | 310 } |
| 311 if (FLAG_harmony_regexp_named_captures && unicode()) { |
| 312 is_named_capture = true; |
| 313 Advance(); |
| 314 break; |
| 315 } |
297 // Fall through. | 316 // Fall through. |
298 default: | 317 default: |
299 return ReportError(CStrVector("Invalid group")); | 318 return ReportError(CStrVector("Invalid group")); |
300 } | 319 } |
301 Advance(2); | 320 } |
302 } else { | 321 |
| 322 const ZoneVector<uc16>* capture_name = nullptr; |
| 323 if (subexpr_type == CAPTURE) { |
303 if (captures_started_ >= kMaxCaptures) { | 324 if (captures_started_ >= kMaxCaptures) { |
304 return ReportError(CStrVector("Too many captures")); | 325 return ReportError(CStrVector("Too many captures")); |
305 } | 326 } |
306 captures_started_++; | 327 captures_started_++; |
| 328 |
| 329 if (is_named_capture) { |
| 330 capture_name = ParseCaptureGroupName(CHECK_FAILED); |
| 331 } |
307 } | 332 } |
308 // Store current state and begin new disjunction parsing. | 333 // Store current state and begin new disjunction parsing. |
309 state = new (zone()) RegExpParserState( | 334 state = new (zone()) RegExpParserState( |
310 state, subexpr_type, lookaround_type, captures_started_, | 335 state, subexpr_type, lookaround_type, captures_started_, |
311 ignore_case(), unicode(), zone()); | 336 capture_name, ignore_case(), unicode(), zone()); |
312 builder = state->builder(); | 337 builder = state->builder(); |
313 continue; | 338 continue; |
314 } | 339 } |
315 case '[': { | 340 case '[': { |
316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); | 341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); |
317 builder->AddCharacterClass(cc->AsCharacterClass()); | 342 builder->AddCharacterClass(cc->AsCharacterClass()); |
318 break; | 343 break; |
319 } | 344 } |
320 // Atom :: | 345 // Atom :: |
321 // \ AtomEscape | 346 // \ AtomEscape |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
409 if (unicode()) { | 434 if (unicode()) { |
410 return ReportError(CStrVector("Invalid escape")); | 435 return ReportError(CStrVector("Invalid escape")); |
411 } | 436 } |
412 uc32 first_digit = Next(); | 437 uc32 first_digit = Next(); |
413 if (first_digit == '8' || first_digit == '9') { | 438 if (first_digit == '8' || first_digit == '9') { |
414 builder->AddCharacter(first_digit); | 439 builder->AddCharacter(first_digit); |
415 Advance(2); | 440 Advance(2); |
416 break; | 441 break; |
417 } | 442 } |
418 } | 443 } |
419 // FALLTHROUGH | 444 // Fall through. |
420 case '0': { | 445 case '0': { |
421 Advance(); | 446 Advance(); |
422 if (unicode() && Next() >= '0' && Next() <= '9') { | 447 if (unicode() && Next() >= '0' && Next() <= '9') { |
423 // With /u, decimal escape with leading 0 are not parsed as octal. | 448 // With /u, decimal escape with leading 0 are not parsed as octal. |
424 return ReportError(CStrVector("Invalid decimal escape")); | 449 return ReportError(CStrVector("Invalid decimal escape")); |
425 } | 450 } |
426 uc32 octal = ParseOctalLiteral(); | 451 uc32 octal = ParseOctalLiteral(); |
427 builder->AddCharacter(octal); | 452 builder->AddCharacter(octal); |
428 break; | 453 break; |
429 } | 454 } |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
490 if (ParseUnicodeEscape(&value)) { | 515 if (ParseUnicodeEscape(&value)) { |
491 builder->AddEscapedUnicodeCharacter(value); | 516 builder->AddEscapedUnicodeCharacter(value); |
492 } else if (!unicode()) { | 517 } else if (!unicode()) { |
493 builder->AddCharacter('u'); | 518 builder->AddCharacter('u'); |
494 } else { | 519 } else { |
495 // With /u, invalid escapes are not treated as identity escapes. | 520 // With /u, invalid escapes are not treated as identity escapes. |
496 return ReportError(CStrVector("Invalid unicode escape")); | 521 return ReportError(CStrVector("Invalid unicode escape")); |
497 } | 522 } |
498 break; | 523 break; |
499 } | 524 } |
| 525 case 'k': |
| 526 if (FLAG_harmony_regexp_named_captures && unicode()) { |
| 527 Advance(2); |
| 528 ParseNamedBackReference(builder, state CHECK_FAILED); |
| 529 break; |
| 530 } |
| 531 // Fall through. |
500 default: | 532 default: |
501 Advance(); | 533 Advance(); |
502 // With /u, no identity escapes except for syntax characters | 534 // With /u, no identity escapes except for syntax characters |
503 // are allowed. Otherwise, all identity escapes are allowed. | 535 // are allowed. Otherwise, all identity escapes are allowed. |
504 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { | 536 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { |
505 builder->AddCharacter(current()); | 537 builder->AddCharacter(current()); |
506 Advance(); | 538 Advance(); |
507 } else { | 539 } else { |
508 return ReportError(CStrVector("Invalid escape")); | 540 return ReportError(CStrVector("Invalid escape")); |
509 } | 541 } |
510 break; | 542 break; |
511 } | 543 } |
512 break; | 544 break; |
513 case '{': { | 545 case '{': { |
514 int dummy; | 546 int dummy; |
515 bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED); | 547 bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED); |
516 if (parsed) return ReportError(CStrVector("Nothing to repeat")); | 548 if (parsed) return ReportError(CStrVector("Nothing to repeat")); |
517 // fallthrough | 549 // Fall through. |
518 } | 550 } |
519 case '}': | 551 case '}': |
520 case ']': | 552 case ']': |
521 if (unicode()) { | 553 if (unicode()) { |
522 return ReportError(CStrVector("Lone quantifier brackets")); | 554 return ReportError(CStrVector("Lone quantifier brackets")); |
523 } | 555 } |
524 // fallthrough | 556 // Fall through. |
525 default: | 557 default: |
526 builder->AddUnicodeCharacter(current()); | 558 builder->AddUnicodeCharacter(current()); |
527 Advance(); | 559 Advance(); |
528 break; | 560 break; |
529 } // end switch(current()) | 561 } // end switch(current()) |
530 | 562 |
531 int min; | 563 int min; |
532 int max; | 564 int max; |
533 switch (current()) { | 565 switch (current()) { |
534 // QuantifierPrefix :: | 566 // QuantifierPrefix :: |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
668 } | 700 } |
669 if (value > capture_count_) { | 701 if (value > capture_count_) { |
670 Reset(start); | 702 Reset(start); |
671 return false; | 703 return false; |
672 } | 704 } |
673 } | 705 } |
674 *index_out = value; | 706 *index_out = value; |
675 return true; | 707 return true; |
676 } | 708 } |
677 | 709 |
| 710 static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) { |
| 711 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 712 v->push_back(code_unit); |
| 713 } else { |
| 714 v->push_back(unibrow::Utf16::LeadSurrogate(code_unit)); |
| 715 v->push_back(unibrow::Utf16::TrailSurrogate(code_unit)); |
| 716 } |
| 717 } |
| 718 |
| 719 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
| 720 DCHECK(FLAG_harmony_regexp_named_captures); |
| 721 DCHECK(unicode()); |
| 722 |
| 723 ZoneVector<uc16>* name = |
| 724 new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone()); |
| 725 |
| 726 bool at_start = true; |
| 727 while (true) { |
| 728 uc32 c = current(); |
| 729 Advance(); |
| 730 |
| 731 // Convert unicode escapes. |
| 732 if (c == '\\' && current() == 'u') { |
| 733 Advance(); |
| 734 if (!ParseUnicodeEscape(&c)) { |
| 735 ReportError(CStrVector("Invalid Unicode escape sequence")); |
| 736 return nullptr; |
| 737 } |
| 738 } |
| 739 |
| 740 if (at_start) { |
| 741 if (!IdentifierStart::Is(c)) { |
| 742 ReportError(CStrVector("Invalid capture group name")); |
| 743 return nullptr; |
| 744 } |
| 745 push_code_unit(name, c); |
| 746 at_start = false; |
| 747 } else { |
| 748 if (c == '>') { |
| 749 break; |
| 750 } else if (IdentifierPart::Is(c)) { |
| 751 push_code_unit(name, c); |
| 752 } else { |
| 753 ReportError(CStrVector("Invalid capture group name")); |
| 754 return nullptr; |
| 755 } |
| 756 } |
| 757 } |
| 758 |
| 759 return name; |
| 760 } |
| 761 |
| 762 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, |
| 763 int index) { |
| 764 DCHECK(FLAG_harmony_regexp_named_captures); |
| 765 DCHECK(unicode()); |
| 766 DCHECK(0 < index && index <= captures_started_); |
| 767 DCHECK_NOT_NULL(name); |
| 768 |
| 769 if (named_captures_ == nullptr) { |
| 770 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone()); |
| 771 } else { |
| 772 // Check for duplicates and bail if we find any. |
| 773 for (const auto& named_capture : *named_captures_) { |
| 774 if (*named_capture->name() == *name) { |
| 775 ReportError(CStrVector("Duplicate capture group name")); |
| 776 return false; |
| 777 } |
| 778 } |
| 779 } |
| 780 |
| 781 RegExpCapture* capture = GetCapture(index); |
| 782 DCHECK(capture->name() == nullptr); |
| 783 |
| 784 capture->set_name(name); |
| 785 named_captures_->Add(capture, zone()); |
| 786 |
| 787 return true; |
| 788 } |
| 789 |
| 790 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder, |
| 791 RegExpParserState* state) { |
| 792 // The parser is assumed to be on the '<' in \k<name>. |
| 793 if (current() != '<') { |
| 794 ReportError(CStrVector("Invalid named reference")); |
| 795 return false; |
| 796 } |
| 797 |
| 798 Advance(); |
| 799 const ZoneVector<uc16>* name = ParseCaptureGroupName(); |
| 800 if (name == nullptr) { |
| 801 return false; |
| 802 } |
| 803 |
| 804 if (state->IsInsideCaptureGroup(name)) { |
| 805 builder->AddEmpty(); |
| 806 } else { |
| 807 RegExpBackReference* atom = new (zone()) RegExpBackReference(); |
| 808 atom->set_name(name); |
| 809 |
| 810 builder->AddAtom(atom); |
| 811 |
| 812 if (named_back_references_ == nullptr) { |
| 813 named_back_references_ = |
| 814 new (zone()) ZoneList<RegExpBackReference*>(1, zone()); |
| 815 } |
| 816 named_back_references_->Add(atom, zone()); |
| 817 } |
| 818 |
| 819 return true; |
| 820 } |
| 821 |
| 822 void RegExpParser::PatchNamedBackReferences() { |
| 823 if (named_back_references_ == nullptr) return; |
| 824 |
| 825 if (named_captures_ == nullptr) { |
| 826 ReportError(CStrVector("Invalid named capture referenced")); |
| 827 return; |
| 828 } |
| 829 |
| 830 // Look up and patch the actual capture for each named back reference. |
| 831 // TODO(jgruber): O(n^2), optimize if necessary. |
| 832 |
| 833 for (int i = 0; i < named_back_references_->length(); i++) { |
| 834 RegExpBackReference* ref = named_back_references_->at(i); |
| 835 |
| 836 int index = -1; |
| 837 for (const auto& capture : *named_captures_) { |
| 838 if (*capture->name() == *ref->name()) { |
| 839 index = capture->index(); |
| 840 break; |
| 841 } |
| 842 } |
| 843 |
| 844 if (index == -1) { |
| 845 ReportError(CStrVector("Invalid named capture referenced")); |
| 846 return; |
| 847 } |
| 848 |
| 849 ref->set_capture(GetCapture(index)); |
| 850 } |
| 851 } |
678 | 852 |
679 RegExpCapture* RegExpParser::GetCapture(int index) { | 853 RegExpCapture* RegExpParser::GetCapture(int index) { |
680 // The index for the capture groups are one-based. Its index in the list is | 854 // The index for the capture groups are one-based. Its index in the list is |
681 // zero-based. | 855 // zero-based. |
682 int know_captures = | 856 int know_captures = |
683 is_scanned_for_captures_ ? capture_count_ : captures_started_; | 857 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
684 DCHECK(index <= know_captures); | 858 DCHECK(index <= know_captures); |
685 if (captures_ == NULL) { | 859 if (captures_ == NULL) { |
686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | 860 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
687 } | 861 } |
688 while (captures_->length() < know_captures) { | 862 while (captures_->length() < know_captures) { |
689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | 863 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
690 } | 864 } |
691 return captures_->at(index - 1); | 865 return captures_->at(index - 1); |
692 } | 866 } |
693 | 867 |
| 868 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { |
| 869 if (named_captures_ == nullptr || named_captures_->is_empty()) |
| 870 return Handle<FixedArray>(); |
| 871 |
| 872 Factory* factory = isolate()->factory(); |
| 873 |
| 874 int len = named_captures_->length() * 2; |
| 875 Handle<FixedArray> array = factory->NewFixedArray(len); |
| 876 |
| 877 for (int i = 0; i < named_captures_->length(); i++) { |
| 878 RegExpCapture* capture = named_captures_->at(i); |
| 879 MaybeHandle<String> name = factory->NewStringFromTwoByte(capture->name()); |
| 880 array->set(i * 2, *name.ToHandleChecked()); |
| 881 array->set(i * 2 + 1, Smi::FromInt(capture->index())); |
| 882 } |
| 883 |
| 884 return array; |
| 885 } |
694 | 886 |
695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | 887 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { |
696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | 888 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
697 if (s->group_type() != CAPTURE) continue; | 889 if (s->group_type() != CAPTURE) continue; |
698 // Return true if we found the matching capture index. | 890 // Return true if we found the matching capture index. |
699 if (index == s->capture_index()) return true; | 891 if (index == s->capture_index()) return true; |
700 // Abort if index is larger than what has been parsed up till this state. | 892 // Abort if index is larger than what has been parsed up till this state. |
701 if (index > s->capture_index()) return false; | 893 if (index > s->capture_index()) return false; |
702 } | 894 } |
703 return false; | 895 return false; |
704 } | 896 } |
705 | 897 |
| 898 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup( |
| 899 const ZoneVector<uc16>* name) { |
| 900 DCHECK_NOT_NULL(name); |
| 901 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
| 902 if (s->capture_name() == nullptr) continue; |
| 903 if (*s->capture_name() == *name) return true; |
| 904 } |
| 905 return false; |
| 906 } |
706 | 907 |
707 // QuantifierPrefix :: | 908 // QuantifierPrefix :: |
708 // { DecimalDigits } | 909 // { DecimalDigits } |
709 // { DecimalDigits , } | 910 // { DecimalDigits , } |
710 // { DecimalDigits , DecimalDigits } | 911 // { DecimalDigits , DecimalDigits } |
711 // | 912 // |
712 // Returns true if parsing succeeds, and set the min_out and max_out | 913 // Returns true if parsing succeeds, and set the min_out and max_out |
713 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 914 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
714 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 915 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
715 DCHECK_EQ(current(), '{'); | 916 DCHECK_EQ(current(), '{'); |
(...skipping 412 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1128 default: | 1329 default: |
1129 first = ParseClassCharacterEscape(CHECK_FAILED); | 1330 first = ParseClassCharacterEscape(CHECK_FAILED); |
1130 } | 1331 } |
1131 } else { | 1332 } else { |
1132 Advance(); | 1333 Advance(); |
1133 } | 1334 } |
1134 | 1335 |
1135 return CharacterRange::Singleton(first); | 1336 return CharacterRange::Singleton(first); |
1136 } | 1337 } |
1137 | 1338 |
1138 | |
1139 static const uc16 kNoCharClass = 0; | 1339 static const uc16 kNoCharClass = 0; |
1140 | 1340 |
1141 // Adds range or pre-defined character class to character ranges. | 1341 // Adds range or pre-defined character class to character ranges. |
1142 // If char_class is not kInvalidClass, it's interpreted as a class | 1342 // If char_class is not kInvalidClass, it's interpreted as a class |
1143 // escape (i.e., 's' means whitespace, from '\s'). | 1343 // escape (i.e., 's' means whitespace, from '\s'). |
1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1344 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
1145 uc16 char_class, CharacterRange range, | 1345 uc16 char_class, CharacterRange range, |
1146 Zone* zone) { | 1346 Zone* zone) { |
1147 if (char_class != kNoCharClass) { | 1347 if (char_class != kNoCharClass) { |
1148 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1348 CharacterRange::AddClassEscape(char_class, ranges, zone); |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1261 DCHECK(result->error.is_null()); | 1461 DCHECK(result->error.is_null()); |
1262 if (FLAG_trace_regexp_parser) { | 1462 if (FLAG_trace_regexp_parser) { |
1263 OFStream os(stdout); | 1463 OFStream os(stdout); |
1264 tree->Print(os, zone); | 1464 tree->Print(os, zone); |
1265 os << "\n"; | 1465 os << "\n"; |
1266 } | 1466 } |
1267 result->tree = tree; | 1467 result->tree = tree; |
1268 int capture_count = parser.captures_started(); | 1468 int capture_count = parser.captures_started(); |
1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 1469 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
1270 result->contains_anchor = parser.contains_anchor(); | 1470 result->contains_anchor = parser.contains_anchor(); |
| 1471 result->capture_name_map = parser.CreateCaptureNameMap(); |
1271 result->capture_count = capture_count; | 1472 result->capture_count = capture_count; |
1272 } | 1473 } |
1273 return !parser.failed(); | 1474 return !parser.failed(); |
1274 } | 1475 } |
1275 | 1476 |
1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) | 1477 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) |
1277 : zone_(zone), | 1478 : zone_(zone), |
1278 pending_empty_(false), | 1479 pending_empty_(false), |
1279 ignore_case_(ignore_case), | 1480 ignore_case_(ignore_case), |
1280 unicode_(unicode), | 1481 unicode_(unicode), |
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1557 return false; | 1758 return false; |
1558 } | 1759 } |
1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1760 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1560 zone()); | 1761 zone()); |
1561 LAST(ADD_TERM); | 1762 LAST(ADD_TERM); |
1562 return true; | 1763 return true; |
1563 } | 1764 } |
1564 | 1765 |
1565 } // namespace internal | 1766 } // namespace internal |
1566 } // namespace v8 | 1767 } // namespace v8 |
OLD | NEW |