| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| 11 #include "src/ostreams.h" | 11 #include "src/ostreams.h" |
| 12 #include "src/regexp/jsregexp.h" | 12 #include "src/regexp/jsregexp.h" |
| 13 #include "src/utils.h" | 13 #include "src/utils.h" |
| 14 | 14 |
| 15 #ifdef V8_I18N_SUPPORT | 15 #ifdef V8_I18N_SUPPORT |
| 16 #include "unicode/uset.h" | 16 #include "unicode/uset.h" |
| 17 #endif // V8_I18N_SUPPORT | 17 #endif // V8_I18N_SUPPORT |
| 18 | 18 |
| 19 namespace v8 { | 19 namespace v8 { |
| 20 namespace internal { | 20 namespace internal { |
| 21 | 21 |
| 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) | 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) |
| 24 : isolate_(isolate), | 24 : isolate_(isolate), |
| 25 zone_(zone), | 25 zone_(zone), |
| 26 error_(error), | 26 error_(error), |
| 27 captures_(NULL), | 27 captures_(NULL), |
| 28 named_captures_(NULL), |
| 29 named_back_references_(NULL), |
| 28 in_(in), | 30 in_(in), |
| 29 current_(kEndMarker), | 31 current_(kEndMarker), |
| 30 ignore_case_(flags & JSRegExp::kIgnoreCase), | 32 ignore_case_(flags & JSRegExp::kIgnoreCase), |
| 31 multiline_(flags & JSRegExp::kMultiline), | 33 multiline_(flags & JSRegExp::kMultiline), |
| 32 unicode_(flags & JSRegExp::kUnicode), | 34 unicode_(flags & JSRegExp::kUnicode), |
| 33 next_pos_(0), | 35 next_pos_(0), |
| 34 captures_started_(0), | 36 captures_started_(0), |
| 35 capture_count_(0), | 37 capture_count_(0), |
| 36 has_more_(true), | 38 has_more_(true), |
| 37 simple_(false), | 39 simple_(false), |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 142 | 144 |
| 143 #define CHECK_FAILED /**/); \ | 145 #define CHECK_FAILED /**/); \ |
| 144 if (failed_) return NULL; \ | 146 if (failed_) return NULL; \ |
| 145 ((void)0 | 147 ((void)0 |
| 146 | 148 |
| 147 | 149 |
| 148 // Pattern :: | 150 // Pattern :: |
| 149 // Disjunction | 151 // Disjunction |
| 150 RegExpTree* RegExpParser::ParsePattern() { | 152 RegExpTree* RegExpParser::ParsePattern() { |
| 151 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 153 RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
| 154 PatchNamedBackReferences(CHECK_FAILED); |
| 152 DCHECK(!has_more()); | 155 DCHECK(!has_more()); |
| 153 // If the result of parsing is a literal string atom, and it has the | 156 // If the result of parsing is a literal string atom, and it has the |
| 154 // same length as the input, then the atom is identical to the input. | 157 // same length as the input, then the atom is identical to the input. |
| 155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 158 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
| 156 simple_ = true; | 159 simple_ = true; |
| 157 } | 160 } |
| 158 return result; | 161 return result; |
| 159 } | 162 } |
| 160 | 163 |
| 161 | 164 |
| 162 // Disjunction :: | 165 // Disjunction :: |
| 163 // Alternative | 166 // Alternative |
| 164 // Alternative | Disjunction | 167 // Alternative | Disjunction |
| 165 // Alternative :: | 168 // Alternative :: |
| 166 // [empty] | 169 // [empty] |
| 167 // Term Alternative | 170 // Term Alternative |
| 168 // Term :: | 171 // Term :: |
| 169 // Assertion | 172 // Assertion |
| 170 // Atom | 173 // Atom |
| 171 // Atom Quantifier | 174 // Atom Quantifier |
| 172 RegExpTree* RegExpParser::ParseDisjunction() { | 175 RegExpTree* RegExpParser::ParseDisjunction() { |
| 173 // Used to store current state while parsing subexpressions. | 176 // Used to store current state while parsing subexpressions. |
| 174 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | 177 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
| 175 ignore_case(), unicode(), zone()); | 178 nullptr, ignore_case(), unicode(), zone()); |
| 176 RegExpParserState* state = &initial_state; | 179 RegExpParserState* state = &initial_state; |
| 177 // Cache the builder in a local variable for quick access. | 180 // Cache the builder in a local variable for quick access. |
| 178 RegExpBuilder* builder = initial_state.builder(); | 181 RegExpBuilder* builder = initial_state.builder(); |
| 179 while (true) { | 182 while (true) { |
| 180 switch (current()) { | 183 switch (current()) { |
| 181 case kEndMarker: | 184 case kEndMarker: |
| 182 if (state->IsSubexpression()) { | 185 if (state->IsSubexpression()) { |
| 183 // Inside a parenthesized group when hitting end of input. | 186 // Inside a parenthesized group when hitting end of input. |
| 184 return ReportError(CStrVector("Unterminated group")); | 187 return ReportError(CStrVector("Unterminated group")); |
| 185 } | 188 } |
| (...skipping 11 matching lines...) Expand all Loading... |
| 197 // regexp atom. | 200 // regexp atom. |
| 198 RegExpTree* body = builder->ToRegExp(); | 201 RegExpTree* body = builder->ToRegExp(); |
| 199 | 202 |
| 200 int end_capture_index = captures_started(); | 203 int end_capture_index = captures_started(); |
| 201 | 204 |
| 202 int capture_index = state->capture_index(); | 205 int capture_index = state->capture_index(); |
| 203 SubexpressionType group_type = state->group_type(); | 206 SubexpressionType group_type = state->group_type(); |
| 204 | 207 |
| 205 // Build result of subexpression. | 208 // Build result of subexpression. |
| 206 if (group_type == CAPTURE) { | 209 if (group_type == CAPTURE) { |
| 210 if (state->IsNamedCapture()) { |
| 211 CreateNamedCaptureAtIndex(state->capture_name(), |
| 212 capture_index CHECK_FAILED); |
| 213 } |
| 207 RegExpCapture* capture = GetCapture(capture_index); | 214 RegExpCapture* capture = GetCapture(capture_index); |
| 208 capture->set_body(body); | 215 capture->set_body(body); |
| 209 body = capture; | 216 body = capture; |
| 210 } else if (group_type != GROUPING) { | 217 } else if (group_type != GROUPING) { |
| 211 DCHECK(group_type == POSITIVE_LOOKAROUND || | 218 DCHECK(group_type == POSITIVE_LOOKAROUND || |
| 212 group_type == NEGATIVE_LOOKAROUND); | 219 group_type == NEGATIVE_LOOKAROUND); |
| 213 bool is_positive = (group_type == POSITIVE_LOOKAROUND); | 220 bool is_positive = (group_type == POSITIVE_LOOKAROUND); |
| 214 body = new (zone()) RegExpLookaround( | 221 body = new (zone()) RegExpLookaround( |
| 215 body, is_positive, end_capture_index - capture_index, | 222 body, is_positive, end_capture_index - capture_index, |
| 216 capture_index, state->lookaround_type()); | 223 capture_index, state->lookaround_type()); |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 261 new (zone()) ZoneList<CharacterRange>(2, zone()); | 268 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 262 CharacterRange::AddClassEscape('.', ranges, zone()); | 269 CharacterRange::AddClassEscape('.', ranges, zone()); |
| 263 RegExpCharacterClass* cc = | 270 RegExpCharacterClass* cc = |
| 264 new (zone()) RegExpCharacterClass(ranges, false); | 271 new (zone()) RegExpCharacterClass(ranges, false); |
| 265 builder->AddCharacterClass(cc); | 272 builder->AddCharacterClass(cc); |
| 266 break; | 273 break; |
| 267 } | 274 } |
| 268 case '(': { | 275 case '(': { |
| 269 SubexpressionType subexpr_type = CAPTURE; | 276 SubexpressionType subexpr_type = CAPTURE; |
| 270 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | 277 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
| 278 bool is_named_capture = false; |
| 271 Advance(); | 279 Advance(); |
| 272 if (current() == '?') { | 280 if (current() == '?') { |
| 273 switch (Next()) { | 281 switch (Next()) { |
| 274 case ':': | 282 case ':': |
| 275 subexpr_type = GROUPING; | 283 subexpr_type = GROUPING; |
| 284 Advance(2); |
| 276 break; | 285 break; |
| 277 case '=': | 286 case '=': |
| 278 lookaround_type = RegExpLookaround::LOOKAHEAD; | 287 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 279 subexpr_type = POSITIVE_LOOKAROUND; | 288 subexpr_type = POSITIVE_LOOKAROUND; |
| 289 Advance(2); |
| 280 break; | 290 break; |
| 281 case '!': | 291 case '!': |
| 282 lookaround_type = RegExpLookaround::LOOKAHEAD; | 292 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 283 subexpr_type = NEGATIVE_LOOKAROUND; | 293 subexpr_type = NEGATIVE_LOOKAROUND; |
| 294 Advance(2); |
| 284 break; | 295 break; |
| 285 case '<': | 296 case '<': |
| 297 Advance(); |
| 286 if (FLAG_harmony_regexp_lookbehind) { | 298 if (FLAG_harmony_regexp_lookbehind) { |
| 287 Advance(); | |
| 288 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
| 289 if (Next() == '=') { | 299 if (Next() == '=') { |
| 290 subexpr_type = POSITIVE_LOOKAROUND; | 300 subexpr_type = POSITIVE_LOOKAROUND; |
| 301 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 302 Advance(2); |
| 291 break; | 303 break; |
| 292 } else if (Next() == '!') { | 304 } else if (Next() == '!') { |
| 293 subexpr_type = NEGATIVE_LOOKAROUND; | 305 subexpr_type = NEGATIVE_LOOKAROUND; |
| 306 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 307 Advance(2); |
| 294 break; | 308 break; |
| 295 } | 309 } |
| 296 } | 310 } |
| 311 if (FLAG_harmony_regexp_named_captures && unicode()) { |
| 312 is_named_capture = true; |
| 313 Advance(); |
| 314 break; |
| 315 } |
| 297 // Fall through. | 316 // Fall through. |
| 298 default: | 317 default: |
| 299 return ReportError(CStrVector("Invalid group")); | 318 return ReportError(CStrVector("Invalid group")); |
| 300 } | 319 } |
| 301 Advance(2); | 320 } |
| 302 } else { | 321 |
| 322 const ZoneVector<uc16>* capture_name = nullptr; |
| 323 if (subexpr_type == CAPTURE) { |
| 303 if (captures_started_ >= kMaxCaptures) { | 324 if (captures_started_ >= kMaxCaptures) { |
| 304 return ReportError(CStrVector("Too many captures")); | 325 return ReportError(CStrVector("Too many captures")); |
| 305 } | 326 } |
| 306 captures_started_++; | 327 captures_started_++; |
| 328 |
| 329 if (is_named_capture) { |
| 330 capture_name = ParseCaptureGroupName(CHECK_FAILED); |
| 331 } |
| 307 } | 332 } |
| 308 // Store current state and begin new disjunction parsing. | 333 // Store current state and begin new disjunction parsing. |
| 309 state = new (zone()) RegExpParserState( | 334 state = new (zone()) RegExpParserState( |
| 310 state, subexpr_type, lookaround_type, captures_started_, | 335 state, subexpr_type, lookaround_type, captures_started_, |
| 311 ignore_case(), unicode(), zone()); | 336 capture_name, ignore_case(), unicode(), zone()); |
| 312 builder = state->builder(); | 337 builder = state->builder(); |
| 313 continue; | 338 continue; |
| 314 } | 339 } |
| 315 case '[': { | 340 case '[': { |
| 316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); | 341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); |
| 317 builder->AddCharacterClass(cc->AsCharacterClass()); | 342 builder->AddCharacterClass(cc->AsCharacterClass()); |
| 318 break; | 343 break; |
| 319 } | 344 } |
| 320 // Atom :: | 345 // Atom :: |
| 321 // \ AtomEscape | 346 // \ AtomEscape |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 490 if (ParseUnicodeEscape(&value)) { | 515 if (ParseUnicodeEscape(&value)) { |
| 491 builder->AddEscapedUnicodeCharacter(value); | 516 builder->AddEscapedUnicodeCharacter(value); |
| 492 } else if (!unicode()) { | 517 } else if (!unicode()) { |
| 493 builder->AddCharacter('u'); | 518 builder->AddCharacter('u'); |
| 494 } else { | 519 } else { |
| 495 // With /u, invalid escapes are not treated as identity escapes. | 520 // With /u, invalid escapes are not treated as identity escapes. |
| 496 return ReportError(CStrVector("Invalid unicode escape")); | 521 return ReportError(CStrVector("Invalid unicode escape")); |
| 497 } | 522 } |
| 498 break; | 523 break; |
| 499 } | 524 } |
| 525 case 'k': |
| 526 if (FLAG_harmony_regexp_named_captures && unicode()) { |
| 527 Advance(2); |
| 528 ParseNamedBackReference(builder, state CHECK_FAILED); |
| 529 break; |
| 530 } |
| 531 // FALLTHROUGH |
| 500 default: | 532 default: |
| 501 Advance(); | 533 Advance(); |
| 502 // With /u, no identity escapes except for syntax characters | 534 // With /u, no identity escapes except for syntax characters |
| 503 // are allowed. Otherwise, all identity escapes are allowed. | 535 // are allowed. Otherwise, all identity escapes are allowed. |
| 504 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { | 536 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { |
| 505 builder->AddCharacter(current()); | 537 builder->AddCharacter(current()); |
| 506 Advance(); | 538 Advance(); |
| 507 } else { | 539 } else { |
| 508 return ReportError(CStrVector("Invalid escape")); | 540 return ReportError(CStrVector("Invalid escape")); |
| 509 } | 541 } |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 668 } | 700 } |
| 669 if (value > capture_count_) { | 701 if (value > capture_count_) { |
| 670 Reset(start); | 702 Reset(start); |
| 671 return false; | 703 return false; |
| 672 } | 704 } |
| 673 } | 705 } |
| 674 *index_out = value; | 706 *index_out = value; |
| 675 return true; | 707 return true; |
| 676 } | 708 } |
| 677 | 709 |
| 710 static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) { |
| 711 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 712 v->push_back(code_unit); |
| 713 } else { |
| 714 v->push_back(unibrow::Utf16::LeadSurrogate(code_unit)); |
| 715 v->push_back(unibrow::Utf16::TrailSurrogate(code_unit)); |
| 716 } |
| 717 } |
| 718 |
| 719 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
| 720 DCHECK(FLAG_harmony_regexp_named_captures); |
| 721 DCHECK(unicode()); |
| 722 |
| 723 ZoneVector<uc16>* name = |
| 724 new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone()); |
| 725 |
| 726 bool at_start = true; |
| 727 while (true) { |
| 728 uc32 c = current(); |
| 729 Advance(); |
| 730 |
| 731 // Convert unicode escapes. |
| 732 if (c == '\\' && current() == 'u') { |
| 733 Advance(); |
| 734 if (!ParseUnicodeEscape(&c)) { |
| 735 ReportError(CStrVector("Invalid Unicode escape sequence")); |
| 736 return nullptr; |
| 737 } |
| 738 } |
| 739 |
| 740 if (at_start) { |
| 741 if (!IdentifierStart::Is(c)) { |
| 742 ReportError(CStrVector("Invalid capture group name")); |
| 743 return nullptr; |
| 744 } |
| 745 push_code_unit(name, c); |
| 746 at_start = false; |
| 747 } else { |
| 748 if (c == '>') { |
| 749 break; |
| 750 } else if (IdentifierPart::Is(c)) { |
| 751 push_code_unit(name, c); |
| 752 } else { |
| 753 ReportError(CStrVector("Invalid capture group name")); |
| 754 return nullptr; |
| 755 } |
| 756 } |
| 757 } |
| 758 |
| 759 return name; |
| 760 } |
| 761 |
| 762 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, |
| 763 int index) { |
| 764 DCHECK(FLAG_harmony_regexp_named_captures); |
| 765 DCHECK(unicode()); |
| 766 DCHECK(0 < index && index <= captures_started_); |
| 767 DCHECK_NOT_NULL(name); |
| 768 |
| 769 if (named_captures_ == nullptr) { |
| 770 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone()); |
| 771 } else { |
| 772 // Check for duplicates and bail if we find any. |
| 773 for (const auto& named_capture : *named_captures_) { |
| 774 if (*named_capture->name() == *name) { |
| 775 ReportError(CStrVector("Duplicate capture group name")); |
| 776 return false; |
| 777 } |
| 778 } |
| 779 } |
| 780 |
| 781 RegExpCapture* capture = GetCapture(index); |
| 782 DCHECK(capture->name() == nullptr); |
| 783 |
| 784 capture->set_name(name); |
| 785 named_captures_->Add(capture, zone()); |
| 786 |
| 787 return true; |
| 788 } |
| 789 |
| 790 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder, |
| 791 RegExpParserState* state) { |
| 792 // The parser is assumed to be on the '<' in \k<name>. |
| 793 if (current() != '<') { |
| 794 ReportError(CStrVector("Invalid named reference")); |
| 795 return false; |
| 796 } |
| 797 |
| 798 Advance(); |
| 799 const ZoneVector<uc16>* name = ParseCaptureGroupName(); |
| 800 if (name == nullptr) { |
| 801 return false; |
| 802 } |
| 803 |
| 804 if (state->IsInsideCaptureGroup(name)) { |
| 805 builder->AddEmpty(); |
| 806 } else { |
| 807 RegExpBackReference* atom = new (zone()) RegExpBackReference(); |
| 808 atom->set_name(name); |
| 809 |
| 810 builder->AddAtom(atom); |
| 811 |
| 812 if (named_back_references_ == nullptr) { |
| 813 named_back_references_ = |
| 814 new (zone()) ZoneList<RegExpBackReference*>(1, zone()); |
| 815 } |
| 816 named_back_references_->Add(atom, zone()); |
| 817 } |
| 818 |
| 819 return true; |
| 820 } |
| 821 |
| 822 void RegExpParser::PatchNamedBackReferences() { |
| 823 if (named_back_references_ == nullptr) return; |
| 824 |
| 825 if (named_captures_ == nullptr) { |
| 826 ReportError(CStrVector("Invalid named capture referenced")); |
| 827 return; |
| 828 } |
| 829 |
| 830 // Look up and patch the actual capture for each named back reference. |
| 831 // TODO(jgruber): O(n^2), optimize if necessary. |
| 832 |
| 833 for (int i = 0; i < named_back_references_->length(); i++) { |
| 834 RegExpBackReference* ref = named_back_references_->at(i); |
| 835 |
| 836 int index = -1; |
| 837 for (const auto& capture : *named_captures_) { |
| 838 if (*capture->name() == *ref->name()) { |
| 839 index = capture->index(); |
| 840 break; |
| 841 } |
| 842 } |
| 843 |
| 844 if (index == -1) { |
| 845 ReportError(CStrVector("Invalid named capture referenced")); |
| 846 return; |
| 847 } |
| 848 |
| 849 ref->set_capture(GetCapture(index)); |
| 850 } |
| 851 } |
| 678 | 852 |
| 679 RegExpCapture* RegExpParser::GetCapture(int index) { | 853 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 680 // The index for the capture groups are one-based. Its index in the list is | 854 // The index for the capture groups are one-based. Its index in the list is |
| 681 // zero-based. | 855 // zero-based. |
| 682 int know_captures = | 856 int know_captures = |
| 683 is_scanned_for_captures_ ? capture_count_ : captures_started_; | 857 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 684 DCHECK(index <= know_captures); | 858 DCHECK(index <= know_captures); |
| 685 if (captures_ == NULL) { | 859 if (captures_ == NULL) { |
| 686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | 860 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 687 } | 861 } |
| 688 while (captures_->length() < know_captures) { | 862 while (captures_->length() < know_captures) { |
| 689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | 863 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 690 } | 864 } |
| 691 return captures_->at(index - 1); | 865 return captures_->at(index - 1); |
| 692 } | 866 } |
| 693 | 867 |
| 868 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { |
| 869 if (named_captures_ == nullptr || named_captures_->is_empty()) |
| 870 return Handle<FixedArray>(); |
| 871 |
| 872 int len = named_captures_->length() * 2; |
| 873 Handle<FixedArray> array = isolate()->factory()->NewFixedArray(len); |
| 874 |
| 875 for (int i = 0; i < named_captures_->length(); i++) { |
| 876 RegExpCapture* capture = named_captures_->at(i); |
| 877 Vector<const uc16> vector(&(*capture->name())[0], |
| 878 static_cast<int>(capture->name()->size())); |
| 879 MaybeHandle<String> name = |
| 880 isolate()->factory()->NewStringFromTwoByte(vector); |
| 881 array->set(i * 2, *name.ToHandleChecked()); |
| 882 array->set(i * 2 + 1, Smi::FromInt(capture->index())); |
| 883 } |
| 884 |
| 885 return array; |
| 886 } |
| 694 | 887 |
| 695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | 888 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { |
| 696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | 889 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
| 697 if (s->group_type() != CAPTURE) continue; | 890 if (s->group_type() != CAPTURE) continue; |
| 698 // Return true if we found the matching capture index. | 891 // Return true if we found the matching capture index. |
| 699 if (index == s->capture_index()) return true; | 892 if (index == s->capture_index()) return true; |
| 700 // Abort if index is larger than what has been parsed up till this state. | 893 // Abort if index is larger than what has been parsed up till this state. |
| 701 if (index > s->capture_index()) return false; | 894 if (index > s->capture_index()) return false; |
| 702 } | 895 } |
| 703 return false; | 896 return false; |
| 704 } | 897 } |
| 705 | 898 |
| 899 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup( |
| 900 const ZoneVector<uc16>* name) { |
| 901 DCHECK_NOT_NULL(name); |
| 902 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
| 903 if (s->capture_name() == nullptr) continue; |
| 904 if (*s->capture_name() == *name) return true; |
| 905 } |
| 906 return false; |
| 907 } |
| 706 | 908 |
| 707 // QuantifierPrefix :: | 909 // QuantifierPrefix :: |
| 708 // { DecimalDigits } | 910 // { DecimalDigits } |
| 709 // { DecimalDigits , } | 911 // { DecimalDigits , } |
| 710 // { DecimalDigits , DecimalDigits } | 912 // { DecimalDigits , DecimalDigits } |
| 711 // | 913 // |
| 712 // Returns true if parsing succeeds, and set the min_out and max_out | 914 // Returns true if parsing succeeds, and set the min_out and max_out |
| 713 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 915 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
| 714 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 916 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| 715 DCHECK_EQ(current(), '{'); | 917 DCHECK_EQ(current(), '{'); |
| (...skipping 412 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1128 default: | 1330 default: |
| 1129 first = ParseClassCharacterEscape(CHECK_FAILED); | 1331 first = ParseClassCharacterEscape(CHECK_FAILED); |
| 1130 } | 1332 } |
| 1131 } else { | 1333 } else { |
| 1132 Advance(); | 1334 Advance(); |
| 1133 } | 1335 } |
| 1134 | 1336 |
| 1135 return CharacterRange::Singleton(first); | 1337 return CharacterRange::Singleton(first); |
| 1136 } | 1338 } |
| 1137 | 1339 |
| 1138 | |
| 1139 static const uc16 kNoCharClass = 0; | 1340 static const uc16 kNoCharClass = 0; |
| 1140 | 1341 |
| 1141 // Adds range or pre-defined character class to character ranges. | 1342 // Adds range or pre-defined character class to character ranges. |
| 1142 // If char_class is not kInvalidClass, it's interpreted as a class | 1343 // If char_class is not kInvalidClass, it's interpreted as a class |
| 1143 // escape (i.e., 's' means whitespace, from '\s'). | 1344 // escape (i.e., 's' means whitespace, from '\s'). |
| 1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1345 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
| 1145 uc16 char_class, CharacterRange range, | 1346 uc16 char_class, CharacterRange range, |
| 1146 Zone* zone) { | 1347 Zone* zone) { |
| 1147 if (char_class != kNoCharClass) { | 1348 if (char_class != kNoCharClass) { |
| 1148 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1349 CharacterRange::AddClassEscape(char_class, ranges, zone); |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1261 DCHECK(result->error.is_null()); | 1462 DCHECK(result->error.is_null()); |
| 1262 if (FLAG_trace_regexp_parser) { | 1463 if (FLAG_trace_regexp_parser) { |
| 1263 OFStream os(stdout); | 1464 OFStream os(stdout); |
| 1264 tree->Print(os, zone); | 1465 tree->Print(os, zone); |
| 1265 os << "\n"; | 1466 os << "\n"; |
| 1266 } | 1467 } |
| 1267 result->tree = tree; | 1468 result->tree = tree; |
| 1268 int capture_count = parser.captures_started(); | 1469 int capture_count = parser.captures_started(); |
| 1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 1470 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
| 1270 result->contains_anchor = parser.contains_anchor(); | 1471 result->contains_anchor = parser.contains_anchor(); |
| 1472 result->capture_name_map = parser.CreateCaptureNameMap(); |
| 1271 result->capture_count = capture_count; | 1473 result->capture_count = capture_count; |
| 1272 } | 1474 } |
| 1273 return !parser.failed(); | 1475 return !parser.failed(); |
| 1274 } | 1476 } |
| 1275 | 1477 |
| 1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) | 1478 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) |
| 1277 : zone_(zone), | 1479 : zone_(zone), |
| 1278 pending_empty_(false), | 1480 pending_empty_(false), |
| 1279 ignore_case_(ignore_case), | 1481 ignore_case_(ignore_case), |
| 1280 unicode_(unicode), | 1482 unicode_(unicode), |
| (...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1557 return false; | 1759 return false; |
| 1558 } | 1760 } |
| 1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1761 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1560 zone()); | 1762 zone()); |
| 1561 LAST(ADD_TERM); | 1763 LAST(ADD_TERM); |
| 1562 return true; | 1764 return true; |
| 1563 } | 1765 } |
| 1564 | 1766 |
| 1565 } // namespace internal | 1767 } // namespace internal |
| 1566 } // namespace v8 | 1768 } // namespace v8 |
| OLD | NEW |