OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
11 #include "src/ostreams.h" | 11 #include "src/ostreams.h" |
12 #include "src/regexp/jsregexp.h" | 12 #include "src/regexp/jsregexp.h" |
13 #include "src/utils.h" | 13 #include "src/utils.h" |
14 | 14 |
15 #ifdef V8_I18N_SUPPORT | 15 #ifdef V8_I18N_SUPPORT |
16 #include "unicode/uset.h" | 16 #include "unicode/uset.h" |
17 #endif // V8_I18N_SUPPORT | 17 #endif // V8_I18N_SUPPORT |
18 | 18 |
19 namespace v8 { | 19 namespace v8 { |
20 namespace internal { | 20 namespace internal { |
21 | 21 |
22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) | 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) |
24 : isolate_(isolate), | 24 : isolate_(isolate), |
25 zone_(zone), | 25 zone_(zone), |
26 error_(error), | 26 error_(error), |
27 captures_(NULL), | 27 captures_(NULL), |
28 named_captures_(NULL), | |
29 named_back_references_(NULL), | |
30 capture_strings_(0, zone), | |
28 in_(in), | 31 in_(in), |
29 current_(kEndMarker), | 32 current_(kEndMarker), |
30 ignore_case_(flags & JSRegExp::kIgnoreCase), | 33 ignore_case_(flags & JSRegExp::kIgnoreCase), |
31 multiline_(flags & JSRegExp::kMultiline), | 34 multiline_(flags & JSRegExp::kMultiline), |
32 unicode_(flags & JSRegExp::kUnicode), | 35 unicode_(flags & JSRegExp::kUnicode), |
33 next_pos_(0), | 36 next_pos_(0), |
34 captures_started_(0), | 37 captures_started_(0), |
35 capture_count_(0), | 38 capture_count_(0), |
36 has_more_(true), | 39 has_more_(true), |
37 simple_(false), | 40 simple_(false), |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
142 | 145 |
143 #define CHECK_FAILED /**/); \ | 146 #define CHECK_FAILED /**/); \ |
144 if (failed_) return NULL; \ | 147 if (failed_) return NULL; \ |
145 ((void)0 | 148 ((void)0 |
146 | 149 |
147 | 150 |
148 // Pattern :: | 151 // Pattern :: |
149 // Disjunction | 152 // Disjunction |
150 RegExpTree* RegExpParser::ParsePattern() { | 153 RegExpTree* RegExpParser::ParsePattern() { |
151 RegExpTree* result = ParseDisjunction(CHECK_FAILED); | 154 RegExpTree* result = ParseDisjunction(CHECK_FAILED); |
155 PatchNamedBackReferences(CHECK_FAILED); | |
152 DCHECK(!has_more()); | 156 DCHECK(!has_more()); |
153 // If the result of parsing is a literal string atom, and it has the | 157 // If the result of parsing is a literal string atom, and it has the |
154 // same length as the input, then the atom is identical to the input. | 158 // same length as the input, then the atom is identical to the input. |
155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 159 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
156 simple_ = true; | 160 simple_ = true; |
157 } | 161 } |
158 return result; | 162 return result; |
159 } | 163 } |
160 | 164 |
161 | 165 |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
261 new (zone()) ZoneList<CharacterRange>(2, zone()); | 265 new (zone()) ZoneList<CharacterRange>(2, zone()); |
262 CharacterRange::AddClassEscape('.', ranges, zone()); | 266 CharacterRange::AddClassEscape('.', ranges, zone()); |
263 RegExpCharacterClass* cc = | 267 RegExpCharacterClass* cc = |
264 new (zone()) RegExpCharacterClass(ranges, false); | 268 new (zone()) RegExpCharacterClass(ranges, false); |
265 builder->AddCharacterClass(cc); | 269 builder->AddCharacterClass(cc); |
266 break; | 270 break; |
267 } | 271 } |
268 case '(': { | 272 case '(': { |
269 SubexpressionType subexpr_type = CAPTURE; | 273 SubexpressionType subexpr_type = CAPTURE; |
270 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | 274 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
275 bool is_named_capture = false; | |
271 Advance(); | 276 Advance(); |
272 if (current() == '?') { | 277 if (current() == '?') { |
273 switch (Next()) { | 278 switch (Next()) { |
274 case ':': | 279 case ':': |
275 subexpr_type = GROUPING; | 280 subexpr_type = GROUPING; |
281 Advance(2); | |
276 break; | 282 break; |
277 case '=': | 283 case '=': |
278 lookaround_type = RegExpLookaround::LOOKAHEAD; | 284 lookaround_type = RegExpLookaround::LOOKAHEAD; |
279 subexpr_type = POSITIVE_LOOKAROUND; | 285 subexpr_type = POSITIVE_LOOKAROUND; |
286 Advance(2); | |
280 break; | 287 break; |
281 case '!': | 288 case '!': |
282 lookaround_type = RegExpLookaround::LOOKAHEAD; | 289 lookaround_type = RegExpLookaround::LOOKAHEAD; |
283 subexpr_type = NEGATIVE_LOOKAROUND; | 290 subexpr_type = NEGATIVE_LOOKAROUND; |
291 Advance(2); | |
284 break; | 292 break; |
285 case '<': | 293 case '<': |
286 if (FLAG_harmony_regexp_lookbehind) { | 294 if (FLAG_harmony_regexp_lookbehind || |
295 FLAG_harmony_regexp_named_captures) { | |
Yang
2016/06/13 10:54:52
I don't think this check is still necessary. We ca
jgruber
2016/06/13 13:10:00
Done.
| |
287 Advance(); | 296 Advance(); |
288 lookaround_type = RegExpLookaround::LOOKBEHIND; | 297 if (FLAG_harmony_regexp_lookbehind) { |
289 if (Next() == '=') { | 298 if (Next() == '=') { |
290 subexpr_type = POSITIVE_LOOKAROUND; | 299 subexpr_type = POSITIVE_LOOKAROUND; |
291 break; | 300 lookaround_type = RegExpLookaround::LOOKBEHIND; |
292 } else if (Next() == '!') { | 301 Advance(2); |
293 subexpr_type = NEGATIVE_LOOKAROUND; | 302 break; |
303 } else if (Next() == '!') { | |
304 subexpr_type = NEGATIVE_LOOKAROUND; | |
305 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
306 Advance(2); | |
307 break; | |
308 } | |
309 } | |
310 if (FLAG_harmony_regexp_named_captures && unicode()) { | |
311 is_named_capture = true; | |
312 Advance(); | |
294 break; | 313 break; |
295 } | 314 } |
296 } | 315 } |
297 // Fall through. | 316 // Fall through. |
298 default: | 317 default: |
299 return ReportError(CStrVector("Invalid group")); | 318 return ReportError(CStrVector("Invalid group")); |
300 } | 319 } |
301 Advance(2); | 320 } |
302 } else { | 321 |
322 if (subexpr_type == CAPTURE) { | |
303 if (captures_started_ >= kMaxCaptures) { | 323 if (captures_started_ >= kMaxCaptures) { |
304 return ReportError(CStrVector("Too many captures")); | 324 return ReportError(CStrVector("Too many captures")); |
305 } | 325 } |
306 captures_started_++; | 326 captures_started_++; |
327 | |
328 if (is_named_capture) { | |
329 const ZoneVector<uc16>* name = ParseCaptureGroupName(CHECK_FAILED); | |
330 CreateNamedCaptureAtIndex(name, captures_started_ CHECK_FAILED); | |
Yang
2016/06/13 10:54:52
Can we simply attach the name to the parser state
jgruber
2016/06/13 13:10:00
Done.
| |
331 } | |
307 } | 332 } |
308 // Store current state and begin new disjunction parsing. | 333 // Store current state and begin new disjunction parsing. |
309 state = new (zone()) RegExpParserState( | 334 state = new (zone()) RegExpParserState( |
310 state, subexpr_type, lookaround_type, captures_started_, | 335 state, subexpr_type, lookaround_type, captures_started_, |
311 ignore_case(), unicode(), zone()); | 336 ignore_case(), unicode(), zone()); |
312 builder = state->builder(); | 337 builder = state->builder(); |
313 continue; | 338 continue; |
314 } | 339 } |
315 case '[': { | 340 case '[': { |
316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); | 341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED); |
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
490 if (ParseUnicodeEscape(&value)) { | 515 if (ParseUnicodeEscape(&value)) { |
491 builder->AddEscapedUnicodeCharacter(value); | 516 builder->AddEscapedUnicodeCharacter(value); |
492 } else if (!unicode()) { | 517 } else if (!unicode()) { |
493 builder->AddCharacter('u'); | 518 builder->AddCharacter('u'); |
494 } else { | 519 } else { |
495 // With /u, invalid escapes are not treated as identity escapes. | 520 // With /u, invalid escapes are not treated as identity escapes. |
496 return ReportError(CStrVector("Invalid unicode escape")); | 521 return ReportError(CStrVector("Invalid unicode escape")); |
497 } | 522 } |
498 break; | 523 break; |
499 } | 524 } |
525 case 'k': | |
526 if (FLAG_harmony_regexp_named_captures && unicode()) { | |
527 Advance(2); | |
528 ParseNamedBackReference(builder, state CHECK_FAILED); | |
529 break; | |
530 } | |
531 // FALLTHROUGH | |
Yang
2016/06/13 10:54:53
I don't think we need all caps here :)
Above we ha
jgruber
2016/06/13 13:10:00
Looking at the rest of the file, I think we have e
| |
500 default: | 532 default: |
501 Advance(); | 533 Advance(); |
502 // With /u, no identity escapes except for syntax characters | 534 // With /u, no identity escapes except for syntax characters |
503 // are allowed. Otherwise, all identity escapes are allowed. | 535 // are allowed. Otherwise, all identity escapes are allowed. |
504 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { | 536 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { |
505 builder->AddCharacter(current()); | 537 builder->AddCharacter(current()); |
506 Advance(); | 538 Advance(); |
507 } else { | 539 } else { |
508 return ReportError(CStrVector("Invalid escape")); | 540 return ReportError(CStrVector("Invalid escape")); |
509 } | 541 } |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
668 } | 700 } |
669 if (value > capture_count_) { | 701 if (value > capture_count_) { |
670 Reset(start); | 702 Reset(start); |
671 return false; | 703 return false; |
672 } | 704 } |
673 } | 705 } |
674 *index_out = value; | 706 *index_out = value; |
675 return true; | 707 return true; |
676 } | 708 } |
677 | 709 |
710 class CaptureNameBuffer { | |
711 public: | |
712 explicit CaptureNameBuffer(Zone* zone) | |
713 : backing_store_(nullptr), zone_(zone) {} | |
714 | |
715 INLINE(void AddChar(uint32_t code_unit)) { | |
716 if (backing_store_ == nullptr) { | |
717 backing_store_ = | |
718 new (zone_->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone_); | |
719 } | |
720 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
721 backing_store_->push_back(code_unit); | |
722 } else { | |
723 backing_store_->push_back(unibrow::Utf16::LeadSurrogate(code_unit)); | |
724 backing_store_->push_back(unibrow::Utf16::TrailSurrogate(code_unit)); | |
725 } | |
726 } | |
727 | |
728 const ZoneVector<uc16>* two_byte_literal() { return backing_store_; } | |
729 | |
730 private: | |
731 ZoneVector<uc16>* backing_store_; | |
Yang
2016/06/13 10:54:52
Let's make this a non-dynamic member, like Bytecod
jgruber
2016/06/13 13:10:00
Done.
| |
732 Zone* zone_; | |
733 | |
734 DISALLOW_COPY_AND_ASSIGN(CaptureNameBuffer); | |
735 }; | |
736 | |
737 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { | |
738 DCHECK(FLAG_harmony_regexp_named_captures); | |
739 DCHECK(unicode()); | |
740 | |
741 CaptureNameBuffer buf(zone()); | |
742 bool at_start = true; | |
743 while (true) { | |
744 uc32 c = current(); | |
745 Advance(); | |
746 | |
747 // Convert unicode escapes. | |
748 if (c == '\\' && current() == 'u') { | |
749 Advance(); | |
750 if (!ParseUnicodeEscape(&c)) { | |
751 ReportError(CStrVector("Invalid Unicode escape sequence")); | |
752 return nullptr; | |
753 } | |
754 } | |
755 | |
756 if (at_start) { | |
757 if (!IdentifierStart::Is(c)) { | |
758 ReportError(CStrVector("Invalid capture group name")); | |
759 return nullptr; | |
760 } | |
761 buf.AddChar(c); | |
762 at_start = false; | |
763 } else { | |
764 if (c == '>') { | |
765 break; | |
766 } else if (IdentifierPart::Is(c)) { | |
767 buf.AddChar(c); | |
768 } else { | |
769 ReportError(CStrVector("Invalid capture group name")); | |
770 return nullptr; | |
771 } | |
772 } | |
773 } | |
774 | |
775 return buf.two_byte_literal(); | |
776 } | |
777 | |
778 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, | |
779 int index) { | |
780 DCHECK(FLAG_harmony_regexp_named_captures); | |
781 DCHECK(unicode()); | |
782 DCHECK(0 < index && index <= captures_started_); | |
783 DCHECK_NOT_NULL(name); | |
784 | |
785 if (named_captures_ == nullptr) { | |
786 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone()); | |
Yang
2016/06/13 10:54:52
Let's make named_captures_ a non-dynamic member of
jgruber
2016/06/13 13:10:00
Do you have an intuition about how much overhead i
Yang
2016/06/13 13:38:00
Not a lot. List takes 3 pointers, so dynamic alloc
jgruber
2016/06/14 07:53:12
Ok. I'll stick with dynamic lists for now, just to
| |
787 } else { | |
788 // Check for duplicates and bail if we find any. | |
789 for (int i = 0; i < named_captures_->length(); i++) { | |
Yang
2016/06/13 10:54:52
You can use C++11 syntax here.
for (const auto& n
jgruber
2016/06/13 13:10:00
Done.
| |
790 if (*named_captures_->at(i)->name() == *name) { | |
791 ReportError(CStrVector("Duplicate capture group name")); | |
792 return false; | |
793 } | |
794 } | |
795 } | |
796 | |
797 RegExpCapture* capture = GetCapture(index); | |
798 DCHECK(capture->name() == nullptr); | |
799 | |
800 capture->set_name(name); | |
801 named_captures_->Add(capture, zone()); | |
802 | |
803 return true; | |
804 } | |
805 | |
806 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder, | |
807 RegExpParserState* state) { | |
808 // The parser is assumed to be on the '<' in \k<name>. | |
809 if (current() != '<') { | |
810 ReportError(CStrVector("Invalid named reference")); | |
811 return false; | |
812 } | |
813 | |
814 Advance(); | |
815 const ZoneVector<uc16>* name = ParseCaptureGroupName(); | |
816 if (name == nullptr) { | |
817 return false; | |
818 } | |
819 | |
820 const int index = LookupCaptureGroupIndex(name); | |
Yang
2016/06/13 10:54:53
Let's not do this twice, here and in PatchNamedBac
jgruber
2016/06/13 13:10:00
We needed the index here to determine whether to c
| |
821 if (index != -1 && state->IsInsideCaptureGroup(index)) { | |
822 builder->AddEmpty(); | |
823 } else { | |
824 RegExpBackReference* atom = new (zone()) RegExpBackReference(); | |
825 atom->set_name(name); | |
826 | |
827 builder->AddAtom(atom); | |
828 | |
829 if (named_back_references_ == nullptr) { | |
830 named_back_references_ = | |
Yang
2016/06/13 10:54:53
Same here, let's make named_back_references_ a non
jgruber
2016/06/13 13:10:00
See above.
This change is trivial, just want to m
| |
831 new (zone()) ZoneList<RegExpBackReference*>(1, zone()); | |
832 } | |
833 named_back_references_->Add(atom, zone()); | |
834 } | |
835 | |
836 return true; | |
837 } | |
838 | |
839 void RegExpParser::PatchNamedBackReferences() { | |
840 if (named_back_references_ == nullptr) return; | |
841 | |
842 if (named_captures_ == nullptr) { | |
843 ReportError(CStrVector("Invalid named capture referenced")); | |
844 return; | |
845 } | |
846 | |
847 // Look up and patch the actual capture for each named back reference. | |
848 // TODO(jgruber): O(n^2), optimize if necessary. | |
849 | |
850 for (int i = 0; i < named_back_references_->length(); i++) { | |
851 RegExpBackReference* ref = named_back_references_->at(i); | |
852 int index = LookupCaptureGroupIndex(ref->name()); | |
853 if (index == -1) { | |
854 ReportError(CStrVector("Invalid named capture referenced")); | |
855 return; | |
856 } | |
857 ref->set_capture(GetCapture(index)); | |
858 } | |
859 } | |
860 | |
861 int RegExpParser::LookupCaptureGroupIndex(const ZoneVector<uc16>* name) { | |
Yang
2016/06/13 10:54:52
This can be inlined into PatchNamedBackReferences
jgruber
2016/06/13 13:10:00
Done.
| |
862 DCHECK(FLAG_harmony_regexp_named_captures); | |
863 DCHECK(unicode()); | |
864 DCHECK_NOT_NULL(name); | |
865 | |
866 // Attempt an initial lookup. | |
867 if (named_captures_ == nullptr) { | |
868 return -1; | |
869 } | |
870 | |
871 for (int i = 0; i < named_captures_->length(); i++) { | |
872 RegExpCapture* capture = named_captures_->at(i); | |
873 if (*capture->name() == *name) { | |
874 return capture->index(); | |
875 } | |
876 } | |
877 | |
878 return -1; | |
879 } | |
678 | 880 |
679 RegExpCapture* RegExpParser::GetCapture(int index) { | 881 RegExpCapture* RegExpParser::GetCapture(int index) { |
680 // The index for the capture groups are one-based. Its index in the list is | 882 // The index for the capture groups are one-based. Its index in the list is |
681 // zero-based. | 883 // zero-based. |
682 int know_captures = | 884 int know_captures = |
683 is_scanned_for_captures_ ? capture_count_ : captures_started_; | 885 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
684 DCHECK(index <= know_captures); | 886 DCHECK(index <= know_captures); |
685 if (captures_ == NULL) { | 887 if (captures_ == NULL) { |
686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | 888 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
687 } | 889 } |
688 while (captures_->length() < know_captures) { | 890 while (captures_->length() < know_captures) { |
689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | 891 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
690 } | 892 } |
691 return captures_->at(index - 1); | 893 return captures_->at(index - 1); |
692 } | 894 } |
693 | 895 |
896 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { | |
897 if (named_captures_ == nullptr || named_captures_->is_empty()) | |
898 return Handle<FixedArray>(); | |
899 | |
900 int len = named_captures_->length() * 2; | |
901 Handle<FixedArray> array = isolate()->factory()->NewFixedArray(len); | |
902 | |
903 for (int i = 0; i < named_captures_->length(); i++) { | |
904 RegExpCapture* capture = named_captures_->at(i); | |
905 Vector<const uc16> vector(&(*capture->name())[0], | |
Yang
2016/06/13 10:54:53
Could we use a ZoneList for capture->name() instea
jgruber
2016/06/13 13:10:00
I used ZoneVector because of mstarzinger's comment
Yang
2016/06/13 13:38:00
I guess adding ToConstVector to ZoneVector also wo
jgruber
2016/06/14 07:53:12
Done.
| |
906 static_cast<int>(capture->name()->size())); | |
907 MaybeHandle<String> name = | |
908 isolate()->factory()->NewStringFromTwoByte(vector); | |
909 array->set(i * 2, *name.ToHandleChecked()); | |
910 array->set(i * 2 + 1, Smi::FromInt(capture->index())); | |
911 } | |
912 | |
913 return array; | |
914 } | |
915 | |
916 void RegExpParser::FreeCaptureStrings() { | |
Yang
2016/06/13 10:54:52
Do we still need this and capture_strings_?
jgruber
2016/06/13 13:10:00
No. Thanks, good catch.
| |
917 for (int i = 0; i < capture_strings_.length(); i++) { | |
918 capture_strings_[i].Dispose(); | |
919 } | |
920 capture_strings_.Clear(); | |
921 } | |
694 | 922 |
695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | 923 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { |
696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | 924 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
697 if (s->group_type() != CAPTURE) continue; | 925 if (s->group_type() != CAPTURE) continue; |
698 // Return true if we found the matching capture index. | 926 // Return true if we found the matching capture index. |
699 if (index == s->capture_index()) return true; | 927 if (index == s->capture_index()) return true; |
700 // Abort if index is larger than what has been parsed up till this state. | 928 // Abort if index is larger than what has been parsed up till this state. |
701 if (index > s->capture_index()) return false; | 929 if (index > s->capture_index()) return false; |
702 } | 930 } |
703 return false; | 931 return false; |
(...skipping 424 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1128 default: | 1356 default: |
1129 first = ParseClassCharacterEscape(CHECK_FAILED); | 1357 first = ParseClassCharacterEscape(CHECK_FAILED); |
1130 } | 1358 } |
1131 } else { | 1359 } else { |
1132 Advance(); | 1360 Advance(); |
1133 } | 1361 } |
1134 | 1362 |
1135 return CharacterRange::Singleton(first); | 1363 return CharacterRange::Singleton(first); |
1136 } | 1364 } |
1137 | 1365 |
1138 | |
1139 static const uc16 kNoCharClass = 0; | 1366 static const uc16 kNoCharClass = 0; |
1140 | 1367 |
1141 // Adds range or pre-defined character class to character ranges. | 1368 // Adds range or pre-defined character class to character ranges. |
1142 // If char_class is not kInvalidClass, it's interpreted as a class | 1369 // If char_class is not kInvalidClass, it's interpreted as a class |
1143 // escape (i.e., 's' means whitespace, from '\s'). | 1370 // escape (i.e., 's' means whitespace, from '\s'). |
1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1371 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
1145 uc16 char_class, CharacterRange range, | 1372 uc16 char_class, CharacterRange range, |
1146 Zone* zone) { | 1373 Zone* zone) { |
1147 if (char_class != kNoCharClass) { | 1374 if (char_class != kNoCharClass) { |
1148 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1375 CharacterRange::AddClassEscape(char_class, ranges, zone); |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1261 DCHECK(result->error.is_null()); | 1488 DCHECK(result->error.is_null()); |
1262 if (FLAG_trace_regexp_parser) { | 1489 if (FLAG_trace_regexp_parser) { |
1263 OFStream os(stdout); | 1490 OFStream os(stdout); |
1264 tree->Print(os, zone); | 1491 tree->Print(os, zone); |
1265 os << "\n"; | 1492 os << "\n"; |
1266 } | 1493 } |
1267 result->tree = tree; | 1494 result->tree = tree; |
1268 int capture_count = parser.captures_started(); | 1495 int capture_count = parser.captures_started(); |
1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; | 1496 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; |
1270 result->contains_anchor = parser.contains_anchor(); | 1497 result->contains_anchor = parser.contains_anchor(); |
1498 result->capture_name_map = parser.CreateCaptureNameMap(); | |
1271 result->capture_count = capture_count; | 1499 result->capture_count = capture_count; |
1272 } | 1500 } |
1501 parser.FreeCaptureStrings(); | |
1273 return !parser.failed(); | 1502 return !parser.failed(); |
1274 } | 1503 } |
1275 | 1504 |
1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) | 1505 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode) |
1277 : zone_(zone), | 1506 : zone_(zone), |
1278 pending_empty_(false), | 1507 pending_empty_(false), |
1279 ignore_case_(ignore_case), | 1508 ignore_case_(ignore_case), |
1280 unicode_(unicode), | 1509 unicode_(unicode), |
1281 characters_(NULL), | 1510 characters_(NULL), |
1282 pending_surrogate_(kNoPendingSurrogate), | 1511 pending_surrogate_(kNoPendingSurrogate), |
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1557 return false; | 1786 return false; |
1558 } | 1787 } |
1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1788 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1560 zone()); | 1789 zone()); |
1561 LAST(ADD_TERM); | 1790 LAST(ADD_TERM); |
1562 return true; | 1791 return true; |
1563 } | 1792 } |
1564 | 1793 |
1565 } // namespace internal | 1794 } // namespace internal |
1566 } // namespace v8 | 1795 } // namespace v8 |
OLD | NEW |