src/regexp/regexp-parser.cc - Issue 2050343002: [regexp] Experimental support for regexp named captures

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2050343002: [regexp] Experimental support for regexp named captures (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: static_cast<int> Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/regexp-parser.h"	5 #include "src/regexp/regexp-parser.h"

6	6

7 #include "src/char-predicates-inl.h"	7 #include "src/char-predicates-inl.h"

8 #include "src/factory.h"	8 #include "src/factory.h"

9 #include "src/isolate.h"	9 #include "src/isolate.h"

10 #include "src/objects-inl.h"	10 #include "src/objects-inl.h"

11 #include "src/ostreams.h"	11 #include "src/ostreams.h"

12 #include "src/regexp/jsregexp.h"	12 #include "src/regexp/jsregexp.h"

13 #include "src/utils.h"	13 #include "src/utils.h"

14	14

15 #ifdef V8_I18N_SUPPORT	15 #ifdef V8_I18N_SUPPORT

16 #include "unicode/uset.h"	16 #include "unicode/uset.h"

17 #endif // V8_I18N_SUPPORT	17 #endif // V8_I18N_SUPPORT

18	18

19 namespace v8 {	19 namespace v8 {

20 namespace internal {	20 namespace internal {

21	21

22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,	22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,

23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)	23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)

24 : isolate_(isolate),	24 : isolate_(isolate),

25 zone_(zone),	25 zone_(zone),

26 error_(error),	26 error_(error),

27 captures_(NULL),	27 captures_(NULL),

	28 named_captures_(NULL),

	29 named_back_references_(NULL),

	30 capture_strings_(0, zone),

28 in_(in),	31 in_(in),

29 current_(kEndMarker),	32 current_(kEndMarker),

30 ignore_case_(flags & JSRegExp::kIgnoreCase),	33 ignore_case_(flags & JSRegExp::kIgnoreCase),

31 multiline_(flags & JSRegExp::kMultiline),	34 multiline_(flags & JSRegExp::kMultiline),

32 unicode_(flags & JSRegExp::kUnicode),	35 unicode_(flags & JSRegExp::kUnicode),

33 next_pos_(0),	36 next_pos_(0),

34 captures_started_(0),	37 captures_started_(0),

35 capture_count_(0),	38 capture_count_(0),

36 has_more_(true),	39 has_more_(true),

37 simple_(false),	40 simple_(false),

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
142	145

143 #define CHECK_FAILED /**/); \	146 #define CHECK_FAILED /**/); \

144 if (failed_) return NULL; \	147 if (failed_) return NULL; \

145 ((void)0	148 ((void)0

146	149

147	150

148 // Pattern ::	151 // Pattern ::

149 // Disjunction	152 // Disjunction

150 RegExpTree* RegExpParser::ParsePattern() {	153 RegExpTree* RegExpParser::ParsePattern() {

151 RegExpTree* result = ParseDisjunction(CHECK_FAILED);	154 RegExpTree* result = ParseDisjunction(CHECK_FAILED);

	155 PatchNamedBackReferences(CHECK_FAILED);

152 DCHECK(!has_more());	156 DCHECK(!has_more());

153 // If the result of parsing is a literal string atom, and it has the	157 // If the result of parsing is a literal string atom, and it has the

154 // same length as the input, then the atom is identical to the input.	158 // same length as the input, then the atom is identical to the input.

155 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {	159 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {

156 simple_ = true;	160 simple_ = true;

157 }	161 }

158 return result;	162 return result;

159 }	163 }

160	164

161	165

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
261 new (zone()) ZoneList<CharacterRange>(2, zone());	265 new (zone()) ZoneList<CharacterRange>(2, zone());

262 CharacterRange::AddClassEscape('.', ranges, zone());	266 CharacterRange::AddClassEscape('.', ranges, zone());

263 RegExpCharacterClass* cc =	267 RegExpCharacterClass* cc =

264 new (zone()) RegExpCharacterClass(ranges, false);	268 new (zone()) RegExpCharacterClass(ranges, false);

265 builder->AddCharacterClass(cc);	269 builder->AddCharacterClass(cc);

266 break;	270 break;

267 }	271 }

268 case '(': {	272 case '(': {

269 SubexpressionType subexpr_type = CAPTURE;	273 SubexpressionType subexpr_type = CAPTURE;

270 RegExpLookaround::Type lookaround_type = state->lookaround_type();	274 RegExpLookaround::Type lookaround_type = state->lookaround_type();

	275 bool is_named_capture = false;

271 Advance();	276 Advance();

272 if (current() == '?') {	277 if (current() == '?') {

273 switch (Next()) {	278 switch (Next()) {

274 case ':':	279 case ':':

275 subexpr_type = GROUPING;	280 subexpr_type = GROUPING;

	281 Advance(2);

276 break;	282 break;

277 case '=':	283 case '=':

278 lookaround_type = RegExpLookaround::LOOKAHEAD;	284 lookaround_type = RegExpLookaround::LOOKAHEAD;

279 subexpr_type = POSITIVE_LOOKAROUND;	285 subexpr_type = POSITIVE_LOOKAROUND;

	286 Advance(2);

280 break;	287 break;

281 case '!':	288 case '!':

282 lookaround_type = RegExpLookaround::LOOKAHEAD;	289 lookaround_type = RegExpLookaround::LOOKAHEAD;

283 subexpr_type = NEGATIVE_LOOKAROUND;	290 subexpr_type = NEGATIVE_LOOKAROUND;

	291 Advance(2);

284 break;	292 break;

285 case '<':	293 case '<':

286 if (FLAG_harmony_regexp_lookbehind) {	294 if (FLAG_harmony_regexp_lookbehind \|\|

	295 FLAG_harmony_regexp_named_captures) {
	Yang 2016/06/13 10:54:52 I don't think this check is still necessary. We ca I don't think this check is still necessary. We can just always Advance. If none of the flags below are set, we can just fall through and fail parsing. jgruber 2016/06/13 13:10:00 Done. Show quoted text On 2016/06/13 10:54:52, Yang wrote: > I don't think this check is still necessary. We can just always Advance. If none > of the flags below are set, we can just fall through and fail parsing. Done.
287 Advance();	296 Advance();

288 lookaround_type = RegExpLookaround::LOOKBEHIND;	297 if (FLAG_harmony_regexp_lookbehind) {

289 if (Next() == '=') {	298 if (Next() == '=') {

290 subexpr_type = POSITIVE_LOOKAROUND;	299 subexpr_type = POSITIVE_LOOKAROUND;

291 break;	300 lookaround_type = RegExpLookaround::LOOKBEHIND;

292 } else if (Next() == '!') {	301 Advance(2);

293 subexpr_type = NEGATIVE_LOOKAROUND;	302 break;

	303 } else if (Next() == '!') {

	304 subexpr_type = NEGATIVE_LOOKAROUND;

	305 lookaround_type = RegExpLookaround::LOOKBEHIND;

	306 Advance(2);

	307 break;

	308 }

	309 }

	310 if (FLAG_harmony_regexp_named_captures && unicode()) {

	311 is_named_capture = true;

	312 Advance();

294 break;	313 break;

295 }	314 }

296 }	315 }

297 // Fall through.	316 // Fall through.

298 default:	317 default:

299 return ReportError(CStrVector("Invalid group"));	318 return ReportError(CStrVector("Invalid group"));

300 }	319 }

301 Advance(2);	320 }

302 } else {	321

	322 if (subexpr_type == CAPTURE) {

303 if (captures_started_ >= kMaxCaptures) {	323 if (captures_started_ >= kMaxCaptures) {

304 return ReportError(CStrVector("Too many captures"));	324 return ReportError(CStrVector("Too many captures"));

305 }	325 }

306 captures_started_++;	326 captures_started_++;

	327

	328 if (is_named_capture) {

	329 const ZoneVector<uc16>* name = ParseCaptureGroupName(CHECK_FAILED);

	330 CreateNamedCaptureAtIndex(name, captures_started_ CHECK_FAILED);
	Yang 2016/06/13 10:54:52 Can we simply attach the name to the parser state Can we simply attach the name to the parser state here and create the capture group later with the name at the place where we call GetCapture? That way we don't add another place where we create capture groups. jgruber 2016/06/13 13:10:00 Done. Show quoted text On 2016/06/13 10:54:52, Yang wrote: > Can we simply attach the name to the parser state here and create the capture > group later with the name at the place where we call GetCapture? That way we > don't add another place where we create capture groups. Done.
	331 }

307 }	332 }

308 // Store current state and begin new disjunction parsing.	333 // Store current state and begin new disjunction parsing.

309 state = new (zone()) RegExpParserState(	334 state = new (zone()) RegExpParserState(

310 state, subexpr_type, lookaround_type, captures_started_,	335 state, subexpr_type, lookaround_type, captures_started_,

311 ignore_case(), unicode(), zone());	336 ignore_case(), unicode(), zone());

312 builder = state->builder();	337 builder = state->builder();

313 continue;	338 continue;

314 }	339 }

315 case '[': {	340 case '[': {

316 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);	341 RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);

(...skipping 173 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
490 if (ParseUnicodeEscape(&value)) {	515 if (ParseUnicodeEscape(&value)) {

491 builder->AddEscapedUnicodeCharacter(value);	516 builder->AddEscapedUnicodeCharacter(value);

492 } else if (!unicode()) {	517 } else if (!unicode()) {

493 builder->AddCharacter('u');	518 builder->AddCharacter('u');

494 } else {	519 } else {

495 // With /u, invalid escapes are not treated as identity escapes.	520 // With /u, invalid escapes are not treated as identity escapes.

496 return ReportError(CStrVector("Invalid unicode escape"));	521 return ReportError(CStrVector("Invalid unicode escape"));

497 }	522 }

498 break;	523 break;

499 }	524 }

	525 case 'k':

	526 if (FLAG_harmony_regexp_named_captures && unicode()) {

	527 Advance(2);

	528 ParseNamedBackReference(builder, state CHECK_FAILED);

	529 break;

	530 }

	531 // FALLTHROUGH
	Yang 2016/06/13 10:54:53 I don't think we need all caps here :) Above we ha I don't think we need all caps here :) Above we have "Fall through." as comment. jgruber 2016/06/13 13:10:00 Looking at the rest of the file, I think we have e Show quoted text On 2016/06/13 10:54:53, Yang wrote: > I don't think we need all caps here :) > Above we have "Fall through." as comment. Looking at the rest of the file, I think we have every variation of capitalization and white space possible. There actually is a second instance of FALLTHROUGH right above this, which is what I tried to stay consistent with (didn't notice the others). I can go through in a follow-up commit and make sure we use a single style :)
500 default:	532 default:

501 Advance();	533 Advance();

502 // With /u, no identity escapes except for syntax characters	534 // With /u, no identity escapes except for syntax characters

503 // are allowed. Otherwise, all identity escapes are allowed.	535 // are allowed. Otherwise, all identity escapes are allowed.

504 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {	536 if (!unicode() \|\| IsSyntaxCharacterOrSlash(current())) {

505 builder->AddCharacter(current());	537 builder->AddCharacter(current());

506 Advance();	538 Advance();

507 } else {	539 } else {

508 return ReportError(CStrVector("Invalid escape"));	540 return ReportError(CStrVector("Invalid escape"));

509 }	541 }

(...skipping 158 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
668 }	700 }

669 if (value > capture_count_) {	701 if (value > capture_count_) {

670 Reset(start);	702 Reset(start);

671 return false;	703 return false;

672 }	704 }

673 }	705 }

674 *index_out = value;	706 *index_out = value;

675 return true;	707 return true;

676 }	708 }

677	709

	710 class CaptureNameBuffer {

	711 public:

	712 explicit CaptureNameBuffer(Zone* zone)

	713 : backing_store_(nullptr), zone_(zone) {}

	714

	715 INLINE(void AddChar(uint32_t code_unit)) {

	716 if (backing_store_ == nullptr) {

	717 backing_store_ =

	718 new (zone_->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone_);

	719 }

	720 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

	721 backing_store_->push_back(code_unit);

	722 } else {

	723 backing_store_->push_back(unibrow::Utf16::LeadSurrogate(code_unit));

	724 backing_store_->push_back(unibrow::Utf16::TrailSurrogate(code_unit));

	725 }

	726 }

	727

	728 const ZoneVector<uc16>* two_byte_literal() { return backing_store_; }

	729

	730 private:

	731 ZoneVector<uc16>* backing_store_;
	Yang 2016/06/13 10:54:52 Let's make this a non-dynamic member, like Bytecod Let's make this a non-dynamic member, like BytecodeArrayWriter::bytecodes_, and initialize it eagerly in the constructor's initializer list. In fact, we probably can avoid this whole class and just inline the surrogate encoding into ParseCaptureGroupName and define the ZoneVector there. jgruber 2016/06/13 13:10:00 Done. Show quoted text On 2016/06/13 10:54:52, Yang wrote: > Let's make this a non-dynamic member, like BytecodeArrayWriter::bytecodes_, and > initialize it eagerly in the constructor's initializer list. > > In fact, we probably can avoid this whole class and just inline the surrogate > encoding into ParseCaptureGroupName and define the ZoneVector there. Done.
	732 Zone* zone_;

	733

	734 DISALLOW_COPY_AND_ASSIGN(CaptureNameBuffer);

	735 };

	736

	737 const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {

	738 DCHECK(FLAG_harmony_regexp_named_captures);

	739 DCHECK(unicode());

	740

	741 CaptureNameBuffer buf(zone());

	742 bool at_start = true;

	743 while (true) {

	744 uc32 c = current();

	745 Advance();

	746

	747 // Convert unicode escapes.

	748 if (c == '\\' && current() == 'u') {

	749 Advance();

	750 if (!ParseUnicodeEscape(&c)) {

	751 ReportError(CStrVector("Invalid Unicode escape sequence"));

	752 return nullptr;

	753 }

	754 }

	755

	756 if (at_start) {

	757 if (!IdentifierStart::Is(c)) {

	758 ReportError(CStrVector("Invalid capture group name"));

	759 return nullptr;

	760 }

	761 buf.AddChar(c);

	762 at_start = false;

	763 } else {

	764 if (c == '>') {

	765 break;

	766 } else if (IdentifierPart::Is(c)) {

	767 buf.AddChar(c);

	768 } else {

	769 ReportError(CStrVector("Invalid capture group name"));

	770 return nullptr;

	771 }

	772 }

	773 }

	774

	775 return buf.two_byte_literal();

	776 }

	777

	778 bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,

	779 int index) {

	780 DCHECK(FLAG_harmony_regexp_named_captures);

	781 DCHECK(unicode());

	782 DCHECK(0 < index && index <= captures_started_);

	783 DCHECK_NOT_NULL(name);

	784

	785 if (named_captures_ == nullptr) {

	786 named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());
	Yang 2016/06/13 10:54:52 Let's make named_captures_ a non-dynamic member of Let's make named_captures_ a non-dynamic member of RegExpParser. jgruber 2016/06/13 13:10:00 Do you have an intuition about how much overhead i Show quoted text On 2016/06/13 10:54:52, Yang wrote: > Let's make named_captures_ a non-dynamic member of RegExpParser. Do you have an intuition about how much overhead initialization of named_captures_ and named_back_references_ adds to the case of regexps without named captures? The captures_ list itself is also dynamic, and I assumed it was due to performance reasons. Yang 2016/06/13 13:38:00 Not a lot. List takes 3 pointers, so dynamic alloc Show quoted text On 2016/06/13 13:10:00, jgruber wrote: > On 2016/06/13 10:54:52, Yang wrote: > > Let's make named_captures_ a non-dynamic member of RegExpParser. > > Do you have an intuition about how much overhead initialization of > named_captures_ and named_back_references_ adds to the case of regexps without > named captures? > > The captures_ list itself is also dynamic, and I assumed it was due to > performance reasons. Not a lot. List takes 3 pointers, so dynamic allocation would take 1 or 4 depending on whether its initialized. If we don't allocate dynamically, we also save some code. I don't have strong opinion on this, and can be convinced either way. jgruber 2016/06/14 07:53:12 Ok. I'll stick with dynamic lists for now, just to Show quoted text On 2016/06/13 13:38:00, Yang wrote: > On 2016/06/13 13:10:00, jgruber wrote: > > On 2016/06/13 10:54:52, Yang wrote: > > > Let's make named_captures_ a non-dynamic member of RegExpParser. > > > > Do you have an intuition about how much overhead initialization of > > named_captures_ and named_back_references_ adds to the case of regexps without > > named captures? > > > > The captures_ list itself is also dynamic, and I assumed it was due to > > performance reasons. > > Not a lot. List takes 3 pointers, so dynamic allocation would take 1 or 4 > depending on whether its initialized. If we don't allocate dynamically, we also > save some code. > > I don't have strong opinion on this, and can be convinced either way. Ok. I'll stick with dynamic lists for now, just to stay consistent to captures_ and to avoid object creation for patterns without named captures.
	787 } else {

	788 // Check for duplicates and bail if we find any.

	789 for (int i = 0; i < named_captures_->length(); i++) {
	Yang 2016/06/13 10:54:52 You can use C++11 syntax here. for (const auto& n You can use C++11 syntax here. for (const auto& named_capture : named_captures_) jgruber* 2016/06/13 13:10:00 Done. Show quoted text On 2016/06/13 10:54:52, Yang wrote: > You can use C++11 syntax here. > > for (const auto& named_capture : *named_captures_) Done.
	790 if (named_captures_->at(i)->name() == name) {

	791 ReportError(CStrVector("Duplicate capture group name"));

	792 return false;

	793 }

	794 }

	795 }

	796

	797 RegExpCapture* capture = GetCapture(index);

	798 DCHECK(capture->name() == nullptr);

	799

	800 capture->set_name(name);

	801 named_captures_->Add(capture, zone());

	802

	803 return true;

	804 }

	805

	806 bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,

	807 RegExpParserState* state) {

	808 // The parser is assumed to be on the '<' in \k<name>.

	809 if (current() != '<') {

	810 ReportError(CStrVector("Invalid named reference"));

	811 return false;

	812 }

	813

	814 Advance();

	815 const ZoneVector<uc16>* name = ParseCaptureGroupName();

	816 if (name == nullptr) {

	817 return false;

	818 }

	819

	820 const int index = LookupCaptureGroupIndex(name);
	Yang 2016/06/13 10:54:53 Let's not do this twice, here and in PatchNamedBac Let's not do this twice, here and in PatchNamedBackReferences. Let's always do this in the latter. jgruber 2016/06/13 13:10:00 We needed the index here to determine whether to c Show quoted text On 2016/06/13 10:54:53, Yang wrote: > Let's not do this twice, here and in PatchNamedBackReferences. Let's always do > this in the latter. We needed the index here to determine whether to call AddEmpty() or add a back reference node. But since we now store the name in state, we can skip this lookup.
	821 if (index != -1 && state->IsInsideCaptureGroup(index)) {

	822 builder->AddEmpty();

	823 } else {

	824 RegExpBackReference* atom = new (zone()) RegExpBackReference();

	825 atom->set_name(name);

	826

	827 builder->AddAtom(atom);

	828

	829 if (named_back_references_ == nullptr) {

	830 named_back_references_ =
	Yang 2016/06/13 10:54:53 Same here, let's make named_back_references_ a non Same here, let's make named_back_references_ a non-dynamic member. jgruber 2016/06/13 13:10:00 See above. This change is trivial, just want to m Show quoted text On 2016/06/13 10:54:53, Yang wrote: > Same here, let's make named_back_references_ a non-dynamic member. See above. This change is trivial, just want to make sure it's the right way to go :)
	831 new (zone()) ZoneList<RegExpBackReference*>(1, zone());

	832 }

	833 named_back_references_->Add(atom, zone());

	834 }

	835

	836 return true;

	837 }

	838

	839 void RegExpParser::PatchNamedBackReferences() {

	840 if (named_back_references_ == nullptr) return;

	841

	842 if (named_captures_ == nullptr) {

	843 ReportError(CStrVector("Invalid named capture referenced"));

	844 return;

	845 }

	846

	847 // Look up and patch the actual capture for each named back reference.

	848 // TODO(jgruber): O(n^2), optimize if necessary.

	849

	850 for (int i = 0; i < named_back_references_->length(); i++) {

	851 RegExpBackReference* ref = named_back_references_->at(i);

	852 int index = LookupCaptureGroupIndex(ref->name());

	853 if (index == -1) {

	854 ReportError(CStrVector("Invalid named capture referenced"));

	855 return;

	856 }

	857 ref->set_capture(GetCapture(index));

	858 }

	859 }

	860

	861 int RegExpParser::LookupCaptureGroupIndex(const ZoneVector<uc16>* name) {
	Yang 2016/06/13 10:54:52 This can be inlined into PatchNamedBackReferences This can be inlined into PatchNamedBackReferences if this is only called from there. jgruber 2016/06/13 13:10:00 Done. Show quoted text On 2016/06/13 10:54:52, Yang wrote: > This can be inlined into PatchNamedBackReferences if this is only called from > there. Done.
	862 DCHECK(FLAG_harmony_regexp_named_captures);

	863 DCHECK(unicode());

	864 DCHECK_NOT_NULL(name);

	865

	866 // Attempt an initial lookup.

	867 if (named_captures_ == nullptr) {

	868 return -1;

	869 }

	870

	871 for (int i = 0; i < named_captures_->length(); i++) {

	872 RegExpCapture* capture = named_captures_->at(i);

	873 if (capture->name() == name) {

	874 return capture->index();

	875 }

	876 }

	877

	878 return -1;

	879 }

678	880

679 RegExpCapture* RegExpParser::GetCapture(int index) {	881 RegExpCapture* RegExpParser::GetCapture(int index) {

680 // The index for the capture groups are one-based. Its index in the list is	882 // The index for the capture groups are one-based. Its index in the list is

681 // zero-based.	883 // zero-based.

682 int know_captures =	884 int know_captures =

683 is_scanned_for_captures_ ? capture_count_ : captures_started_;	885 is_scanned_for_captures_ ? capture_count_ : captures_started_;

684 DCHECK(index <= know_captures);	886 DCHECK(index <= know_captures);

685 if (captures_ == NULL) {	887 if (captures_ == NULL) {

686 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());	888 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());

687 }	889 }

688 while (captures_->length() < know_captures) {	890 while (captures_->length() < know_captures) {

689 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());	891 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());

690 }	892 }

691 return captures_->at(index - 1);	893 return captures_->at(index - 1);

692 }	894 }

693	895

	896 Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {

	897 if (named_captures_ == nullptr \|\| named_captures_->is_empty())

	898 return Handle<FixedArray>();

	899

	900 int len = named_captures_->length() * 2;

	901 Handle<FixedArray> array = isolate()->factory()->NewFixedArray(len);

	902

	903 for (int i = 0; i < named_captures_->length(); i++) {

	904 RegExpCapture* capture = named_captures_->at(i);

	905 Vector<const uc16> vector(&(*capture->name())[0],
	Yang 2016/06/13 10:54:53 Could we use a ZoneList for capture->name() instea Could we use a ZoneList for capture->name() instead of ZoneVector? They essentially do the same anyways. With ZoneList, you can simply use ToConstVector. jgruber 2016/06/13 13:10:00 I used ZoneVector because of mstarzinger's comment Show quoted text On 2016/06/13 10:54:53, Yang wrote: > Could we use a ZoneList for capture->name() instead of ZoneVector? They > essentially do the same anyways. With ZoneList, you can simply use > ToConstVector. I used ZoneVector because of mstarzinger's comment '[...] once we got rid of our own home-grown ZoneList [...]' in zone-containers. I'm fine with either container type, but is there a real benefit to switching to ZoneVector? What about adding ToConstVector to ZoneVector? Yang 2016/06/13 13:38:00 I guess adding ToConstVector to ZoneVector also wo Show quoted text On 2016/06/13 13:10:00, jgruber wrote: > On 2016/06/13 10:54:53, Yang wrote: > > Could we use a ZoneList for capture->name() instead of ZoneVector? They > > essentially do the same anyways. With ZoneList, you can simply use > > ToConstVector. > > I used ZoneVector because of mstarzinger's comment '[...] once we got rid of our > own home-grown ZoneList [...]' in zone-containers. > > I'm fine with either container type, but is there a real benefit to switching to > ZoneVector? What about adding ToConstVector to ZoneVector? I guess adding ToConstVector to ZoneVector also works. jgruber 2016/06/14 07:53:12 Done. Show quoted text On 2016/06/13 13:38:00, Yang wrote: > On 2016/06/13 13:10:00, jgruber wrote: > > On 2016/06/13 10:54:53, Yang wrote: > > > Could we use a ZoneList for capture->name() instead of ZoneVector? They > > > essentially do the same anyways. With ZoneList, you can simply use > > > ToConstVector. > > > > I used ZoneVector because of mstarzinger's comment '[...] once we got rid of > our > > own home-grown ZoneList [...]' in zone-containers. > > > > I'm fine with either container type, but is there a real benefit to switching > to > > ZoneVector? What about adding ToConstVector to ZoneVector? > > I guess adding ToConstVector to ZoneVector also works. Done.
	906 static_cast<int>(capture->name()->size()));

	907 MaybeHandle<String> name =

	908 isolate()->factory()->NewStringFromTwoByte(vector);

	909 array->set(i * 2, *name.ToHandleChecked());

	910 array->set(i * 2 + 1, Smi::FromInt(capture->index()));

	911 }

	912

	913 return array;

	914 }

	915

	916 void RegExpParser::FreeCaptureStrings() {
	Yang 2016/06/13 10:54:52 Do we still need this and capture_strings_? Do we still need this and capture_strings_? jgruber 2016/06/13 13:10:00 No. Thanks, good catch. Show quoted text On 2016/06/13 10:54:52, Yang wrote: > Do we still need this and capture_strings_? No. Thanks, good catch.
	917 for (int i = 0; i < capture_strings_.length(); i++) {

	918 capture_strings_[i].Dispose();

	919 }

	920 capture_strings_.Clear();

	921 }

694	922

695 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {	923 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {

696 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {	924 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {

697 if (s->group_type() != CAPTURE) continue;	925 if (s->group_type() != CAPTURE) continue;

698 // Return true if we found the matching capture index.	926 // Return true if we found the matching capture index.

699 if (index == s->capture_index()) return true;	927 if (index == s->capture_index()) return true;

700 // Abort if index is larger than what has been parsed up till this state.	928 // Abort if index is larger than what has been parsed up till this state.

701 if (index > s->capture_index()) return false;	929 if (index > s->capture_index()) return false;

702 }	930 }

703 return false;	931 return false;

(...skipping 424 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1128 default:	1356 default:

1129 first = ParseClassCharacterEscape(CHECK_FAILED);	1357 first = ParseClassCharacterEscape(CHECK_FAILED);

1130 }	1358 }

1131 } else {	1359 } else {

1132 Advance();	1360 Advance();

1133 }	1361 }

1134	1362

1135 return CharacterRange::Singleton(first);	1363 return CharacterRange::Singleton(first);

1136 }	1364 }

1137	1365

1138

1139 static const uc16 kNoCharClass = 0;	1366 static const uc16 kNoCharClass = 0;

1140	1367

1141 // Adds range or pre-defined character class to character ranges.	1368 // Adds range or pre-defined character class to character ranges.

1142 // If char_class is not kInvalidClass, it's interpreted as a class	1369 // If char_class is not kInvalidClass, it's interpreted as a class

1143 // escape (i.e., 's' means whitespace, from '\s').	1370 // escape (i.e., 's' means whitespace, from '\s').

1144 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,	1371 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,

1145 uc16 char_class, CharacterRange range,	1372 uc16 char_class, CharacterRange range,

1146 Zone* zone) {	1373 Zone* zone) {

1147 if (char_class != kNoCharClass) {	1374 if (char_class != kNoCharClass) {

1148 CharacterRange::AddClassEscape(char_class, ranges, zone);	1375 CharacterRange::AddClassEscape(char_class, ranges, zone);

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1261 DCHECK(result->error.is_null());	1488 DCHECK(result->error.is_null());

1262 if (FLAG_trace_regexp_parser) {	1489 if (FLAG_trace_regexp_parser) {

1263 OFStream os(stdout);	1490 OFStream os(stdout);

1264 tree->Print(os, zone);	1491 tree->Print(os, zone);

1265 os << "\n";	1492 os << "\n";

1266 }	1493 }

1267 result->tree = tree;	1494 result->tree = tree;

1268 int capture_count = parser.captures_started();	1495 int capture_count = parser.captures_started();

1269 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;	1496 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;

1270 result->contains_anchor = parser.contains_anchor();	1497 result->contains_anchor = parser.contains_anchor();

	1498 result->capture_name_map = parser.CreateCaptureNameMap();

1271 result->capture_count = capture_count;	1499 result->capture_count = capture_count;

1272 }	1500 }

	1501 parser.FreeCaptureStrings();

1273 return !parser.failed();	1502 return !parser.failed();

1274 }	1503 }

1275	1504

1276 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)	1505 RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)

1277 : zone_(zone),	1506 : zone_(zone),

1278 pending_empty_(false),	1507 pending_empty_(false),

1279 ignore_case_(ignore_case),	1508 ignore_case_(ignore_case),

1280 unicode_(unicode),	1509 unicode_(unicode),

1281 characters_(NULL),	1510 characters_(NULL),

1282 pending_surrogate_(kNoPendingSurrogate),	1511 pending_surrogate_(kNoPendingSurrogate),

(...skipping 274 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1557 return false;	1786 return false;

1558 }	1787 }

1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),	1788 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),

1560 zone());	1789 zone());

1561 LAST(ADD_TERM);	1790 LAST(ADD_TERM);

1562 return true;	1791 return true;

1563 }	1792 }

1564	1793

1565 } // namespace internal	1794 } // namespace internal

1566 } // namespace v8	1795 } // namespace v8

OLD	NEW

« src/regexp/jsregexp.cc ('K') | « src/regexp/regexp-parser.h ('k') | src/runtime/runtime.h » ('j') | src/runtime/runtime-regexp.cc » ('J')