Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| 11 #include "src/ostreams.h" | 11 #include "src/ostreams.h" |
| 12 #include "src/regexp/jsregexp.h" | 12 #include "src/regexp/jsregexp.h" |
| 13 #include "src/utils.h" | 13 #include "src/utils.h" |
| 14 | 14 |
| 15 #ifdef V8_I18N_SUPPORT | 15 #ifdef V8_I18N_SUPPORT |
| 16 #include "unicode/uset.h" | 16 #include "unicode/uset.h" |
| 17 #endif // V8_I18N_SUPPORT | 17 #endif // V8_I18N_SUPPORT |
| 18 | 18 |
| 19 namespace v8 { | 19 namespace v8 { |
| 20 namespace internal { | 20 namespace internal { |
| 21 | 21 |
| 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) | 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) |
| 24 : isolate_(isolate), | 24 : isolate_(isolate), |
| 25 zone_(zone), | 25 zone_(zone), |
| 26 error_(error), | 26 error_(error), |
| 27 captures_(NULL), | 27 captures_(NULL), |
| 28 named_captures_(NULL), | |
| 29 named_back_references_(NULL), | |
| 30 capture_strings_(0, zone), | |
| 28 in_(in), | 31 in_(in), |
| 29 current_(kEndMarker), | 32 current_(kEndMarker), |
| 30 ignore_case_(flags & JSRegExp::kIgnoreCase), | 33 ignore_case_(flags & JSRegExp::kIgnoreCase), |
| 31 multiline_(flags & JSRegExp::kMultiline), | 34 multiline_(flags & JSRegExp::kMultiline), |
| 32 unicode_(flags & JSRegExp::kUnicode), | 35 unicode_(flags & JSRegExp::kUnicode), |
| 33 next_pos_(0), | 36 next_pos_(0), |
| 34 captures_started_(0), | 37 captures_started_(0), |
| 35 capture_count_(0), | 38 capture_count_(0), |
| 36 has_more_(true), | 39 has_more_(true), |
| 37 simple_(false), | 40 simple_(false), |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 142 | 145 |
| 143 #define CHECK_FAILED /**/); \ | 146 #define CHECK_FAILED /**/); \ |
| 144 if (failed_) return NULL; \ | 147 if (failed_) return NULL; \ |
| 145 ((void)0 | 148 ((void)0 |
| 146 | 149 |
| 147 | 150 |
| 148 // Pattern :: | 151 // Pattern :: |
| 149 // Disjunction | 152 // Disjunction |
| 150 RegExpTree* RegExpParser::ParsePattern() { | 153 RegExpTree* RegExpParser::ParsePattern() { |
| 151 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 154 RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
| 155 PatchNamedBackReferences(CHECK_FAILED); | |
| 152 DCHECK(!has_more()); | 156 DCHECK(!has_more()); |
| 153 // If the result of parsing is a literal string atom, and it has the | 157 // If the result of parsing is a literal string atom, and it has the |
| 154 // same length as the input, then the atom is identical to the input. | 158 // same length as the input, then the atom is identical to the input. |
| 155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 159 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
| 156 simple_ = true; | 160 simple_ = true; |
| 157 } | 161 } |
| 158 return result; | 162 return result; |
| 159 } | 163 } |
| 160 | 164 |
| 161 | 165 |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 261 new (zone()) ZoneList<CharacterRange>(2, zone()); | 265 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 262 CharacterRange::AddClassEscape('.', ranges, zone()); | 266 CharacterRange::AddClassEscape('.', ranges, zone()); |
| 263 RegExpCharacterClass* cc = | 267 RegExpCharacterClass* cc = |
| 264 new (zone()) RegExpCharacterClass(ranges, false); | 268 new (zone()) RegExpCharacterClass(ranges, false); |
| 265 builder->AddCharacterClass(cc); | 269 builder->AddCharacterClass(cc); |
| 266 break; | 270 break; |
| 267 } | 271 } |
| 268 case '(': { | 272 case '(': { |
| 269 SubexpressionType subexpr_type = CAPTURE; | 273 SubexpressionType subexpr_type = CAPTURE; |
| 270 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | 274 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
| 275 bool is_named_capture = false; | |
| 271 Advance(); | 276 Advance(); |
| 272 if (current() == '?') { | 277 if (current() == '?') { |
| 273 switch (Next()) { | 278 switch (Next()) { |
| 274 case ':': | 279 case ':': |
| 275 subexpr_type = GROUPING; | 280 subexpr_type = GROUPING; |
| 281 Advance(2); | |
| 276 break; | 282 break; |
| 277 case '=': | 283 case '=': |
| 278 lookaround_type = RegExpLookaround::LOOKAHEAD; | 284 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 279 subexpr_type = POSITIVE_LOOKAROUND; | 285 subexpr_type = POSITIVE_LOOKAROUND; |
| 286 Advance(2); | |
| 280 break; | 287 break; |
| 281 case '!': | 288 case '!': |
| 282 lookaround_type = RegExpLookaround::LOOKAHEAD; | 289 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 283 subexpr_type = NEGATIVE_LOOKAROUND; | 290 subexpr_type = NEGATIVE_LOOKAROUND; |
| 291 Advance(2); | |
| 284 break; | 292 break; |
| 285 case '<': | 293 case '<': |
| 286 if (FLAG_harmony_regexp_lookbehind) { | 294 if (FLAG_harmony_regexp_lookbehind || |
| 295 FLAG_harmony_regexp_named_captures) { | |
|
Yang
2016/06/13 10:54:52
I don't think this check is still necessary. We ca
jgruber
2016/06/13 13:10:00
Done.
| |
| 287 Advance(); | 296 Advance(); |
| 288 lookaround_type = RegExpLookaround::LOOKBEHIND; | 297 if (FLAG_harmony_regexp_lookbehind) { |
| 289 if (Next() == '=') { | 298 if (Next() == '=') { |
| 290 subexpr_type = POSITIVE_LOOKAROUND; | 299 subexpr_type = POSITIVE_LOOKAROUND; |
| 291 break; | 300 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 292 } else if (Next() == '!') { | 301 Advance(2); |
| 293 subexpr_type = NEGATIVE_LOOKAROUND; | 302 break; |
| 303 } else if (Next() == '!') { | |
| 304 subexpr_type = NEGATIVE_LOOKAROUND; | |
| 305 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
| 306 Advance(2); | |
| 307 break; | |
| 308 } | |
| 309 } | |
| 310 if (FLAG_harmony_regexp_named_captures && unicode()) { | |
| 311 is_named_capture = true; | |
| 312 Advance(); | |
| 294 break; | 313 break; |
| 295 } | 314 } |
| 296 } | 315 } |
| 297 // Fall through. | 316 // Fall through. |
| 298 default: | 317 default: |
| 299 return ReportError(CStrVector("Invalid group")); | 318 return ReportError(CStrVector("Invalid group")); |
| 300 } | 319 } |
| 301 Advance(2); | 320 } |
| 302 } else { | 321 |
| 322 if (subexpr_type == CAPTURE) { | |
| 303 if (captures_started_ >= kMaxCaptures) { | 323 if (captures_started_ >= kMaxCaptures) { |
| 304 return ReportError(CStrVector("Too many captures")); | 324 return ReportError(CStrVector("Too many captures")); |
| 305 } | 325 } |
| 306 captures_started_++; | 326 captures_started_++; |
| 327 | |
| 328 if (is_named_capture) { | |
| 329 const ZoneVector<uc16>* name = ParseCaptureGroupName(CHECK_FAILED); | |
| 330 CreateNamedCaptureAtIndex(name, captures_started_ CHECK_FAILED); | |
|
Yang
2016/06/13 10:54:52
Can we simply attach the name to the parser state
jgruber
2016/06/13 13:10:00
Done.
| |
| 331 } | |
| 307 } | 332 } |
| 308 // Store current state and begin new disjunction parsing. | 333 // Store current state and begin new disjunction parsing. |
| 309 state = new (zone()) RegExpParserState( | 334 state = new (zone()) RegExpParserState( |
| 310 state, subexpr_type, lookaround_type, captures_started_, | 335 state, subexpr_type, lookaround_type, captures_started_, |
| 311 ignore_case(), unicode(), zone()); | 336 ignore_case(), unicode(), zone()); |
| 312 builder = state->builder(); | 337 builder = state->builder(); |
| 313 continue; | 338 continue; |
| 314 } | 339 } |
| 315 case '[': { | 340 case '[': { |
| 316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); | 341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 490 if (ParseUnicodeEscape(&value)) { | 515 if (ParseUnicodeEscape(&value)) { |
| 491 builder->AddEscapedUnicodeCharacter(value); | 516 builder->AddEscapedUnicodeCharacter(value); |
| 492 } else if (!unicode()) { | 517 } else if (!unicode()) { |
| 493 builder->AddCharacter('u'); | 518 builder->AddCharacter('u'); |
| 494 } else { | 519 } else { |
| 495 // With /u, invalid escapes are not treated as identity escapes. | 520 // With /u, invalid escapes are not treated as identity escapes. |
| 496 return ReportError(CStrVector("Invalid unicode escape")); | 521 return ReportError(CStrVector("Invalid unicode escape")); |
| 497 } | 522 } |
| 498 break; | 523 break; |
| 499 } | 524 } |
| 525 case 'k': | |
| 526 if (FLAG_harmony_regexp_named_captures && unicode()) { | |
| 527 Advance(2); | |
| 528 ParseNamedBackReference(builder, state CHECK_FAILED); | |
| 529 break; | |
| 530 } | |
| 531 // FALLTHROUGH | |
|
Yang
2016/06/13 10:54:53
I don't think we need all caps here :)
Above we ha
jgruber
2016/06/13 13:10:00
Looking at the rest of the file, I think we have e
| |
| 500 default: | 532 default: |
| 501 Advance(); | 533 Advance(); |
| 502 // With /u, no identity escapes except for syntax characters | 534 // With /u, no identity escapes except for syntax characters |
| 503 // are allowed. Otherwise, all identity escapes are allowed. | 535 // are allowed. Otherwise, all identity escapes are allowed. |
| 504 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { | 536 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { |
| 505 builder->AddCharacter(current()); | 537 builder->AddCharacter(current()); |
| 506 Advance(); | 538 Advance(); |
| 507 } else { | 539 } else { |
| 508 return ReportError(CStrVector("Invalid escape")); | 540 return ReportError(CStrVector("Invalid escape")); |
| 509 } | 541 } |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 668 } | 700 } |
| 669 if (value > capture_count_) { | 701 if (value > capture_count_) { |
| 670 Reset(start); | 702 Reset(start); |
| 671 return false; | 703 return false; |
| 672 } | 704 } |
| 673 } | 705 } |
| 674 *index_out = value; | 706 *index_out = value; |
| 675 return true; | 707 return true; |
| 676 } | 708 } |
| 677 | 709 |
| 710 class CaptureNameBuffer { | |
| 711 public: | |
| 712 explicit CaptureNameBuffer(Zone* zone) | |
| 713 : backing_store_(nullptr), zone_(zone) {} | |
| 714 | |
| 715 INLINE(void AddChar(uint32_t code_unit)) { | |
| 716 if (backing_store_ == nullptr) { | |
| 717 backing_store_ = | |
| 718 new (zone_->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone_); | |
| 719 } | |
| 720 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
| 721 backing_store_->push_back(code_unit); | |
| 722 } else { | |
| 723 backing_store_->push_back(unibrow::Utf16::LeadSurrogate(code_unit)); | |
| 724 backing_store_->push_back(unibrow::Utf16::TrailSurrogate(code_unit)); | |
| 725 } | |
| 726 } | |
| 727 | |
| 728 const ZoneVector<uc16>* two_byte_literal() { return backing_store_; } | |
| 729 | |
| 730 private: | |
| 731 ZoneVector<uc16>* backing_store_; | |
|
Yang
2016/06/13 10:54:52
Let's make this a non-dynamic member, like Bytecod
jgruber
2016/06/13 13:10:00
Done.
| |
| 732 Zone* zone_; | |
| 733 | |
| 734 DISALLOW_COPY_AND_ASSIGN(CaptureNameBuffer); | |
| 735 }; | |
| 736 | |
| 737 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { | |
| 738 DCHECK(FLAG_harmony_regexp_named_captures); | |
| 739 DCHECK(unicode()); | |
| 740 | |
| 741 CaptureNameBuffer buf(zone()); | |
| 742 bool at_start = true; | |
| 743 while (true) { | |
| 744 uc32 c = current(); | |
| 745 Advance(); | |
| 746 | |
| 747 // Convert unicode escapes. | |
| 748 if (c == '\\' && current() == 'u') { | |
| 749 Advance(); | |
| 750 if (!ParseUnicodeEscape(&c)) { | |
| 751 ReportError(CStrVector("Invalid Unicode escape sequence")); | |
| 752 return nullptr; | |
| 753 } | |
| 754 } | |
| 755 | |
| 756 if (at_start) { | |
| 757 if (!IdentifierStart::Is(c)) { | |
| 758 ReportError(CStrVector("Invalid capture group name")); | |
| 759 return nullptr; | |
| 760 } | |
| 761 buf.AddChar(c); | |
| 762 at_start = false; | |
| 763 } else { | |
| 764 if (c == '>') { | |
| 765 break; | |
| 766 } else if (IdentifierPart::Is(c)) { | |
| 767 buf.AddChar(c); | |
| 768 } else { | |
| 769 ReportError(CStrVector("Invalid capture group name")); | |
| 770 return nullptr; | |
| 771 } | |
| 772 } | |
| 773 } | |
| 774 | |
| 775 return buf.two_byte_literal(); | |
| 776 } | |
| 777 | |
| 778 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, | |
| 779 int index) { | |
| 780 DCHECK(FLAG_harmony_regexp_named_captures); | |
| 781 DCHECK(unicode()); | |
| 782 DCHECK(0 < index && index <= captures_started_); | |
| 783 DCHECK_NOT_NULL(name); | |
| 784 | |
| 785 if (named_captures_ == nullptr) { | |
| 786 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone()); | |
|
Yang
2016/06/13 10:54:52
Let's make named_captures_ a non-dynamic member of
jgruber
2016/06/13 13:10:00
Do you have an intuition about how much overhead i
Yang
2016/06/13 13:38:00
Not a lot. List takes 3 pointers, so dynamic alloc
jgruber
2016/06/14 07:53:12
Ok. I'll stick with dynamic lists for now, just to
| |
| 787 } else { | |
| 788 // Check for duplicates and bail if we find any. | |
| 789 for (int i = 0; i < named_captures_->length(); i++) { | |
|
Yang
2016/06/13 10:54:52
You can use C++11 syntax here.
for (const auto& n
jgruber
2016/06/13 13:10:00
Done.
| |
| 790 if (*named_captures_->at(i)->name() == *name) { | |
| 791 ReportError(CStrVector("Duplicate capture group name")); | |
| 792 return false; | |
| 793 } | |
| 794 } | |
| 795 } | |
| 796 | |
| 797 RegExpCapture* capture = GetCapture(index); | |
| 798 DCHECK(capture->name() == nullptr); | |
| 799 | |
| 800 capture->set_name(name); | |
| 801 named_captures_->Add(capture, zone()); | |
| 802 | |
| 803 return true; | |
| 804 } | |
| 805 | |
| 806 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder, | |
| 807 RegExpParserState* state) { | |
| 808 // The parser is assumed to be on the '<' in \k<name>. | |
| 809 if (current() != '<') { | |
| 810 ReportError(CStrVector("Invalid named reference")); | |
| 811 return false; | |
| 812 } | |
| 813 | |
| 814 Advance(); | |
| 815 const ZoneVector<uc16>* name = ParseCaptureGroupName(); | |
| 816 if (name == nullptr) { | |
| 817 return false; | |
| 818 } | |
| 819 | |
| 820 const int index = LookupCaptureGroupIndex(name); | |
|
Yang
2016/06/13 10:54:53
Let's not do this twice, here and in PatchNamedBac
jgruber
2016/06/13 13:10:00
We needed the index here to determine whether to c
| |
| 821 if (index != -1 && state->IsInsideCaptureGroup(index)) { | |
| 822 builder->AddEmpty(); | |
| 823 } else { | |
| 824 RegExpBackReference* atom = new (zone()) RegExpBackReference(); | |
| 825 atom->set_name(name); | |
| 826 | |
| 827 builder->AddAtom(atom); | |
| 828 | |
| 829 if (named_back_references_ == nullptr) { | |
| 830 named_back_references_ = | |
|
Yang
2016/06/13 10:54:53
Same here, let's make named_back_references_ a non
jgruber
2016/06/13 13:10:00
See above.
This change is trivial, just want to m
| |
| 831 new (zone()) ZoneList<RegExpBackReference*>(1, zone()); | |
| 832 } | |
| 833 named_back_references_->Add(atom, zone()); | |
| 834 } | |
| 835 | |
| 836 return true; | |
| 837 } | |
| 838 | |
| 839 void RegExpParser::PatchNamedBackReferences() { | |
| 840 if (named_back_references_ == nullptr) return; | |
| 841 | |
| 842 if (named_captures_ == nullptr) { | |
| 843 ReportError(CStrVector("Invalid named capture referenced")); | |
| 844 return; | |
| 845 } | |
| 846 | |
| 847 // Look up and patch the actual capture for each named back reference. | |
| 848 // TODO(jgruber): O(n^2), optimize if necessary. | |
| 849 | |
| 850 for (int i = 0; i < named_back_references_->length(); i++) { | |
| 851 RegExpBackReference* ref = named_back_references_->at(i); | |
| 852 int index = LookupCaptureGroupIndex(ref->name()); | |
| 853 if (index == -1) { | |
| 854 ReportError(CStrVector("Invalid named capture referenced")); | |
| 855 return; | |
| 856 } | |
| 857 ref->set_capture(GetCapture(index)); | |
| 858 } | |
| 859 } | |
| 860 | |
| 861 int RegExpParser::LookupCaptureGroupIndex(const ZoneVector<uc16>* name) { | |
|
Yang
2016/06/13 10:54:52
This can be inlined into PatchNamedBackReferences
jgruber
2016/06/13 13:10:00
Done.
| |
| 862 DCHECK(FLAG_harmony_regexp_named_captures); | |
| 863 DCHECK(unicode()); | |
| 864 DCHECK_NOT_NULL(name); | |
| 865 | |
| 866 // Attempt an initial lookup. | |
| 867 if (named_captures_ == nullptr) { | |
| 868 return -1; | |
| 869 } | |
| 870 | |
| 871 for (int i = 0; i < named_captures_->length(); i++) { | |
| 872 RegExpCapture* capture = named_captures_->at(i); | |
| 873 if (*capture->name() == *name) { | |
| 874 return capture->index(); | |
| 875 } | |
| 876 } | |
| 877 | |
| 878 return -1; | |
| 879 } | |
| 678 | 880 |
| 679 RegExpCapture* RegExpParser::GetCapture(int index) { | 881 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 680 // The index for the capture groups are one-based. Its index in the list is | 882 // The index for the capture groups are one-based. Its index in the list is |
| 681 // zero-based. | 883 // zero-based. |
| 682 int know_captures = | 884 int know_captures = |
| 683 is_scanned_for_captures_ ? capture_count_ : captures_started_; | 885 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 684 DCHECK(index <= know_captures); | 886 DCHECK(index <= know_captures); |
| 685 if (captures_ == NULL) { | 887 if (captures_ == NULL) { |
| 686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | 888 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 687 } | 889 } |
| 688 while (captures_->length() < know_captures) { | 890 while (captures_->length() < know_captures) { |
| 689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | 891 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 690 } | 892 } |
| 691 return captures_->at(index - 1); | 893 return captures_->at(index - 1); |
| 692 } | 894 } |
| 693 | 895 |
| 896 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { | |
| 897 if (named_captures_ == nullptr || named_captures_->is_empty()) | |
| 898 return Handle<FixedArray>(); | |
| 899 | |
| 900 int len = named_captures_->length() * 2; | |
| 901 Handle<FixedArray> array = isolate()->factory()->NewFixedArray(len); | |
| 902 | |
| 903 for (int i = 0; i < named_captures_->length(); i++) { | |
| 904 RegExpCapture* capture = named_captures_->at(i); | |
| 905 Vector<const uc16> vector(&(*capture->name())[0], | |
|
Yang
2016/06/13 10:54:53
Could we use a ZoneList for capture->name() instea
jgruber
2016/06/13 13:10:00
I used ZoneVector because of mstarzinger's comment
Yang
2016/06/13 13:38:00
I guess adding ToConstVector to ZoneVector also wo
jgruber
2016/06/14 07:53:12
Done.
| |
| 906 static_cast<int>(capture->name()->size())); | |
| 907 MaybeHandle<String> name = | |
| 908 isolate()->factory()->NewStringFromTwoByte(vector); | |
| 909 array->set(i * 2, *name.ToHandleChecked()); | |
| 910 array->set(i * 2 + 1, Smi::FromInt(capture->index())); | |
| 911 } | |
| 912 | |
| 913 return array; | |
| 914 } | |
| 915 | |
| 916 void RegExpParser::FreeCaptureStrings() { | |
|
Yang
2016/06/13 10:54:52
Do we still need this and capture_strings_?
jgruber
2016/06/13 13:10:00
No. Thanks, good catch.
| |
| 917 for (int i = 0; i < capture_strings_.length(); i++) { | |
| 918 capture_strings_[i].Dispose(); | |
| 919 } | |
| 920 capture_strings_.Clear(); | |
| 921 } | |
| 694 | 922 |
| 695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | 923 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { |
| 696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | 924 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
| 697 if (s->group_type() != CAPTURE) continue; | 925 if (s->group_type() != CAPTURE) continue; |
| 698 // Return true if we found the matching capture index. | 926 // Return true if we found the matching capture index. |
| 699 if (index == s->capture_index()) return true; | 927 if (index == s->capture_index()) return true; |
| 700 // Abort if index is larger than what has been parsed up till this state. | 928 // Abort if index is larger than what has been parsed up till this state. |
| 701 if (index > s->capture_index()) return false; | 929 if (index > s->capture_index()) return false; |
| 702 } | 930 } |
| 703 return false; | 931 return false; |
| (...skipping 424 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1128 default: | 1356 default: |
| 1129 first = ParseClassCharacterEscape(CHECK_FAILED); | 1357 first = ParseClassCharacterEscape(CHECK_FAILED); |
| 1130 } | 1358 } |
| 1131 } else { | 1359 } else { |
| 1132 Advance(); | 1360 Advance(); |
| 1133 } | 1361 } |
| 1134 | 1362 |
| 1135 return CharacterRange::Singleton(first); | 1363 return CharacterRange::Singleton(first); |
| 1136 } | 1364 } |
| 1137 | 1365 |
| 1138 | |
| 1139 static const uc16 kNoCharClass = 0; | 1366 static const uc16 kNoCharClass = 0; |
| 1140 | 1367 |
| 1141 // Adds range or pre-defined character class to character ranges. | 1368 // Adds range or pre-defined character class to character ranges. |
| 1142 // If char_class is not kInvalidClass, it's interpreted as a class | 1369 // If char_class is not kInvalidClass, it's interpreted as a class |
| 1143 // escape (i.e., 's' means whitespace, from '\s'). | 1370 // escape (i.e., 's' means whitespace, from '\s'). |
| 1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1371 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
| 1145 uc16 char_class, CharacterRange range, | 1372 uc16 char_class, CharacterRange range, |
| 1146 Zone* zone) { | 1373 Zone* zone) { |
| 1147 if (char_class != kNoCharClass) { | 1374 if (char_class != kNoCharClass) { |
| 1148 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1375 CharacterRange::AddClassEscape(char_class, ranges, zone); |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1261 DCHECK(result->error.is_null()); | 1488 DCHECK(result->error.is_null()); |
| 1262 if (FLAG_trace_regexp_parser) { | 1489 if (FLAG_trace_regexp_parser) { |
| 1263 OFStream os(stdout); | 1490 OFStream os(stdout); |
| 1264 tree->Print(os, zone); | 1491 tree->Print(os, zone); |
| 1265 os << "\n"; | 1492 os << "\n"; |
| 1266 } | 1493 } |
| 1267 result->tree = tree; | 1494 result->tree = tree; |
| 1268 int capture_count = parser.captures_started(); | 1495 int capture_count = parser.captures_started(); |
| 1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 1496 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
| 1270 result->contains_anchor = parser.contains_anchor(); | 1497 result->contains_anchor = parser.contains_anchor(); |
| 1498 result->capture_name_map = parser.CreateCaptureNameMap(); | |
| 1271 result->capture_count = capture_count; | 1499 result->capture_count = capture_count; |
| 1272 } | 1500 } |
| 1501 parser.FreeCaptureStrings(); | |
| 1273 return !parser.failed(); | 1502 return !parser.failed(); |
| 1274 } | 1503 } |
| 1275 | 1504 |
| 1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) | 1505 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) |
| 1277 : zone_(zone), | 1506 : zone_(zone), |
| 1278 pending_empty_(false), | 1507 pending_empty_(false), |
| 1279 ignore_case_(ignore_case), | 1508 ignore_case_(ignore_case), |
| 1280 unicode_(unicode), | 1509 unicode_(unicode), |
| 1281 characters_(NULL), | 1510 characters_(NULL), |
| 1282 pending_surrogate_(kNoPendingSurrogate), | 1511 pending_surrogate_(kNoPendingSurrogate), |
| (...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1557 return false; | 1786 return false; |
| 1558 } | 1787 } |
| 1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1788 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1560 zone()); | 1789 zone()); |
| 1561 LAST(ADD_TERM); | 1790 LAST(ADD_TERM); |
| 1562 return true; | 1791 return true; |
| 1563 } | 1792 } |
| 1564 | 1793 |
| 1565 } // namespace internal | 1794 } // namespace internal |
| 1566 } // namespace v8 | 1795 } // namespace v8 |
| OLD | NEW |