src/parser.cc - Issue 149069: Changed RegExp parser to use a recursive data structure instead of stack-based recursion.

Side by Side Diff: src/parser.cc

Issue 149069: Changed RegExp parser to use a recursive data structure instead of stack-based recursion. (Closed)

Patch Set: Removed static nonparticipating capture optimization. Not worth the complexity. Created 11 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 343 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
354 }	354 }

355 return list_;	355 return list_;

356 }	356 }

357	357

358 private:	358 private:

359 ZoneList<T> list_;	359 ZoneList<T> list_;

360 T* last_;	360 T* last_;

361 };	361 };

362	362

363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.	363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.

364 class RegExpBuilder {	364 class RegExpBuilder: public ZoneObject {

365 public:	365 public:

366 RegExpBuilder();	366 RegExpBuilder();

367 void AddCharacter(uc16 character);	367 void AddCharacter(uc16 character);

368 // "Adds" an empty expression. Does nothing except consume a	368 // "Adds" an empty expression. Does nothing except consume a

369 // following quantifier	369 // following quantifier

370 void AddEmpty();	370 void AddEmpty();

371 void AddAtom(RegExpTree* tree);	371 void AddAtom(RegExpTree* tree);

372 void AddAssertion(RegExpTree* tree);	372 void AddAssertion(RegExpTree* tree);

373 void NewAlternative(); // '\|'	373 void NewAlternative(); // '\|'

374 void AddQuantifierToAtom(int min, int max, bool is_greedy);	374 void AddQuantifierToAtom(int min, int max, bool is_greedy);

(...skipping 10 matching lines...) Expand all Loading...
385 #ifdef DEBUG	385 #ifdef DEBUG

386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;	386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;

387 #define LAST(x) last_added_ = x;	387 #define LAST(x) last_added_ = x;

388 #else	388 #else

389 #define LAST(x)	389 #define LAST(x)

390 #endif	390 #endif

391 };	391 };

392	392

393	393

394 RegExpBuilder::RegExpBuilder()	394 RegExpBuilder::RegExpBuilder()

395 : pending_empty_(false), characters_(NULL), terms_(), alternatives_()	395 : pending_empty_(false),

	396 characters_(NULL),

	397 terms_(),

	398 alternatives_()

396 #ifdef DEBUG	399 #ifdef DEBUG

397 , last_added_(ADD_NONE)	400 , last_added_(ADD_NONE)

398 #endif	401 #endif

399 {}	402 {}

400	403

401	404

402 void RegExpBuilder::FlushCharacters() {	405 void RegExpBuilder::FlushCharacters() {

403 pending_empty_ = false;	406 pending_empty_ = false;

404 if (characters_ != NULL) {	407 if (characters_ != NULL) {

405 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());	408 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());

(...skipping 181 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
587 bool simple();	590 bool simple();

588 bool contains_anchor() { return contains_anchor_; }	591 bool contains_anchor() { return contains_anchor_; }

589 void set_contains_anchor() { contains_anchor_ = true; }	592 void set_contains_anchor() { contains_anchor_ = true; }

590 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }	593 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }

591 int position() { return next_pos_ - 1; }	594 int position() { return next_pos_ - 1; }

592 bool failed() { return failed_; }	595 bool failed() { return failed_; }

593	596

594 static const int kMaxCaptures = 1 << 16;	597 static const int kMaxCaptures = 1 << 16;

595 static const uc32 kEndMarker = (1 << 21);	598 static const uc32 kEndMarker = (1 << 21);

596 private:	599 private:

	600 enum SubexpressionType {

	601 INITIAL,

	602 CAPTURE, // All positive values represent captures.

	603 POSITIVE_LOOKAHEAD,

	604 NEGATIVE_LOOKAHEAD,

	605 GROUPING

	606 };

	607

	608 struct RegExpParserState : public ZoneObject {
	Erik Corry 2009/07/01 11:29:14 Please make this a real object. Please make this a real object.
	609 RegExpParserState(RegExpParserState* previous_state,

	610 RegExpBuilder* builder,

	611 int disjunction_capture_index,

	612 SubexpressionType group_type)

	613 : previous_state(previous_state),

	614 builder(builder),

	615 disjunction_capture_index(disjunction_capture_index),

	616 group_type(group_type) {}

	617 // Linked list implementation of stack of states.

	618 RegExpParserState* previous_state;

	619 // Builder for the stored disjunction.

	620 RegExpBuilder* builder;

	621 // Stored disjunction's capture index (if any).

	622 int disjunction_capture_index;

	623 // Stored disjunction type (capture, look-ahead or grouping), if any.

	624 SubexpressionType group_type;

	625 };

597	626

598 uc32 current() { return current_; }	627 uc32 current() { return current_; }

599 bool has_more() { return has_more_; }	628 bool has_more() { return has_more_; }

600 bool has_next() { return next_pos_ < in()->length(); }	629 bool has_next() { return next_pos_ < in()->length(); }

601 uc32 Next();	630 uc32 Next();

602 FlatStringReader* in() { return in_; }	631 FlatStringReader* in() { return in_; }

603 void ScanForCaptures();	632 void ScanForCaptures();

604 bool CaptureAvailable(int index);

605 uc32 current_;	633 uc32 current_;

606 bool has_more_;	634 bool has_more_;

607 bool multiline_;	635 bool multiline_;

608 int next_pos_;	636 int next_pos_;

609 FlatStringReader* in_;	637 FlatStringReader* in_;

610 Handle<String>* error_;	638 Handle<String>* error_;

611 bool simple_;	639 bool simple_;

612 bool contains_anchor_;	640 bool contains_anchor_;

613 ZoneList<RegExpCapture> captures_;	641 ZoneList<RegExpCapture> captures_;

614 bool is_scanned_for_captures_;	642 bool is_scanned_for_captures_;

(...skipping 3198 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3813 }	3841 }

3814 // If the result of parsing is a literal string atom, and it has the	3842 // If the result of parsing is a literal string atom, and it has the

3815 // same length as the input, then the atom is identical to the input.	3843 // same length as the input, then the atom is identical to the input.

3816 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {	3844 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {

3817 simple_ = true;	3845 simple_ = true;

3818 }	3846 }

3819 return result;	3847 return result;

3820 }	3848 }

3821	3849

3822	3850

3823 bool RegExpParser::CaptureAvailable(int index) {

3824 if (captures_ == NULL) return false;

3825 if (index >= captures_->length()) return false;

3826 RegExpCapture* capture = captures_->at(index);

3827 return capture != NULL && capture->available() == CAPTURE_AVAILABLE;

3828 }

3829

3830

3831 // Disjunction ::	3851 // Disjunction ::

3832 // Alternative	3852 // Alternative

3833 // Alternative \| Disjunction	3853 // Alternative \| Disjunction

3834 // Alternative ::	3854 // Alternative ::

3835 // [empty]	3855 // [empty]

3836 // Term Alternative	3856 // Term Alternative

3837 // Term ::	3857 // Term ::

3838 // Assertion	3858 // Assertion

3839 // Atom	3859 // Atom

3840 // Atom Quantifier	3860 // Atom Quantifier

3841 RegExpTree* RegExpParser::ParseDisjunction() {	3861 RegExpTree* RegExpParser::ParseDisjunction() {

3842 RegExpBuilder builder;	3862 // Used to store current state while parsing subexpressions.

3843 int capture_start_index = captures_started();	3863 RegExpParserState* stored_state = NULL;
	Erik Corry 2009/07/01 11:29:14 If you initialize this to an initial stored state If you initialize this to an initial stored state instead then you can have the group_type live inside the stored state. Also the disjunction_capture_index.
	3864 RegExpBuilder* builder = new RegExpBuilder();

	3865 SubexpressionType group_type = INITIAL;

	3866 // Index in captures array of first capture in this sub-expression, if any.

	3867 // Also the capture index of this sub-expression itself, if group_type

	3868 // is CAPTURE.

	3869 int disjunction_capture_index = 0;

3844 while (true) {	3870 while (true) {

3845 switch (current()) {	3871 switch (current()) {

3846 case kEndMarker:	3872 case kEndMarker:

3847 case ')':	3873 if (stored_state != NULL) {

3848 return builder.ToRegExp();	3874 // Inside a parenthesized group when hitting end of input.

	3875 ReportError(CStrVector("Unterminated group") CHECK_FAILED);

	3876 }

	3877 ASSERT_EQ(INITIAL, group_type);

	3878 // Parsing completed successfully.

	3879 return builder->ToRegExp();

	3880 case ')': {

	3881 if (stored_state == NULL) {

	3882 ReportError(CStrVector("Unexpected ')'") CHECK_FAILED);

	3883 }

	3884 ASSERT_NE(INITIAL, group_type);

	3885

	3886 Advance();

	3887 // End disjunction parsing and convert builder content to new single

	3888 // regexp atom.

	3889 RegExpTree* body = builder->ToRegExp();

	3890

	3891 int end_capture_index = captures_started();

	3892

	3893 int capture_index = disjunction_capture_index;

	3894 SubexpressionType type = group_type;

	3895

	3896 // Restore previous state.

	3897 builder = stored_state->builder;

	3898 group_type = stored_state->group_type;

	3899 disjunction_capture_index = stored_state->disjunction_capture_index;

	3900 stored_state = stored_state->previous_state;

	3901

	3902 // Build result of subexpression.

	3903 if (type == CAPTURE) {

	3904 RegExpCapture* capture = new RegExpCapture(body, capture_index);

	3905 captures_->at(capture_index - 1) = capture;

	3906 body = capture;

	3907 } else if (type != GROUPING) {

	3908 ASSERT(type == POSITIVE_LOOKAHEAD \|\| type == NEGATIVE_LOOKAHEAD);

	3909 bool is_positive = (type == POSITIVE_LOOKAHEAD);

	3910 body = new RegExpLookahead(body,

	3911 is_positive,

	3912 end_capture_index - capture_index,

	3913 capture_index);

	3914 }

	3915 builder->AddAtom(body);

	3916 break;

	3917 }

3849 case '\|': {	3918 case '\|': {

3850 Advance();	3919 Advance();

3851 builder.NewAlternative();	3920 builder->NewAlternative();

3852 int capture_new_alt_start_index = captures_started();

3853 for (int i = capture_start_index; i < capture_new_alt_start_index; i++) {

3854 RegExpCapture* capture = captures_->at(i);

3855 if (capture->available() == CAPTURE_AVAILABLE) {

3856 capture->set_available(CAPTURE_UNREACHABLE);

3857 }

3858 }

3859 capture_start_index = capture_new_alt_start_index;

3860 continue;	3921 continue;

3861 }	3922 }

3862 case '*':	3923 case '*':

3863 case '+':	3924 case '+':

3864 case '?':	3925 case '?':

3865 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);	3926 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);

3866 case '^': {	3927 case '^': {

3867 Advance();	3928 Advance();

3868 if (multiline_) {	3929 if (multiline_) {

3869 builder.AddAssertion(	3930 builder->AddAssertion(

3870 new RegExpAssertion(RegExpAssertion::START_OF_LINE));	3931 new RegExpAssertion(RegExpAssertion::START_OF_LINE));

3871 } else {	3932 } else {

3872 builder.AddAssertion(	3933 builder->AddAssertion(

3873 new RegExpAssertion(RegExpAssertion::START_OF_INPUT));	3934 new RegExpAssertion(RegExpAssertion::START_OF_INPUT));

3874 set_contains_anchor();	3935 set_contains_anchor();

3875 }	3936 }

3876 continue;	3937 continue;

3877 }	3938 }

3878 case '$': {	3939 case '$': {

3879 Advance();	3940 Advance();

3880 RegExpAssertion::Type type =	3941 RegExpAssertion::Type type =

3881 multiline_ ? RegExpAssertion::END_OF_LINE :	3942 multiline_ ? RegExpAssertion::END_OF_LINE :

3882 RegExpAssertion::END_OF_INPUT;	3943 RegExpAssertion::END_OF_INPUT;

3883 builder.AddAssertion(new RegExpAssertion(type));	3944 builder->AddAssertion(new RegExpAssertion(type));

3884 continue;	3945 continue;

3885 }	3946 }

3886 case '.': {	3947 case '.': {

3887 Advance();	3948 Advance();

3888 // everything except \x0a, \x0d, \u2028 and \u2029	3949 // everything except \x0a, \x0d, \u2028 and \u2029

3889 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);	3950 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

3890 CharacterRange::AddClassEscape('.', ranges);	3951 CharacterRange::AddClassEscape('.', ranges);

3891 RegExpTree* atom = new RegExpCharacterClass(ranges, false);	3952 RegExpTree* atom = new RegExpCharacterClass(ranges, false);

3892 builder.AddAtom(atom);	3953 builder->AddAtom(atom);

3893 break;	3954 break;

3894 }	3955 }

3895 case '(': {	3956 case '(': {

3896 RegExpTree* atom = ParseGroup(CHECK_FAILED);	3957 SubexpressionType type = CAPTURE;

3897 builder.AddAtom(atom);	3958 Advance();

	3959 if (current() == '?') {

	3960 switch (Next()) {

	3961 case ':':

	3962 type = GROUPING;

	3963 break;

	3964 case '=':

	3965 type = POSITIVE_LOOKAHEAD;

	3966 break;

	3967 case '!':

	3968 type = NEGATIVE_LOOKAHEAD;

	3969 break;

	3970 default:

	3971 ReportError(CStrVector("Invalid group") CHECK_FAILED);

	3972 break;

	3973 }

	3974 Advance(2);

	3975 } else {

	3976 if (captures_ == NULL) {

	3977 captures_ = new ZoneList<RegExpCapture*>(2);

	3978 }

	3979 if (captures_started() >= kMaxCaptures) {

	3980 ReportError(CStrVector("Too many captures") CHECK_FAILED);

	3981 }

	3982 captures_->Add(NULL);

	3983 }

	3984 // Store current state and begin new disjunction parsing.

	3985 stored_state = new RegExpParserState(stored_state,

	3986 builder,

	3987 disjunction_capture_index,

	3988 group_type);

	3989 builder = new RegExpBuilder();

	3990 group_type = type;

	3991 if (type == CAPTURE) {

	3992 disjunction_capture_index = captures_started();

	3993 }

3898 break;	3994 break;

3899 }	3995 }

3900 case '[': {	3996 case '[': {

3901 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);	3997 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);

3902 builder.AddAtom(atom);	3998 builder->AddAtom(atom);

3903 break;	3999 break;

3904 }	4000 }

3905 // Atom ::	4001 // Atom ::

3906 // \ AtomEscape	4002 // \ AtomEscape

3907 case '\\':	4003 case '\\':

3908 switch (Next()) {	4004 switch (Next()) {

3909 case kEndMarker:	4005 case kEndMarker:

3910 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED);	4006 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED);

3911 case 'b':	4007 case 'b':

3912 Advance(2);	4008 Advance(2);

3913 builder.AddAssertion(	4009 builder->AddAssertion(

3914 new RegExpAssertion(RegExpAssertion::BOUNDARY));	4010 new RegExpAssertion(RegExpAssertion::BOUNDARY));

3915 continue;	4011 continue;

3916 case 'B':	4012 case 'B':

3917 Advance(2);	4013 Advance(2);

3918 builder.AddAssertion(	4014 builder->AddAssertion(

3919 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));	4015 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));

3920 continue;	4016 continue;

3921 // AtomEscape ::	4017 // AtomEscape ::

3922 // CharacterClassEscape	4018 // CharacterClassEscape

3923 //	4019 //

3924 // CharacterClassEscape :: one of	4020 // CharacterClassEscape :: one of

3925 // d D s S w W	4021 // d D s S w W

3926 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {	4022 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {

3927 uc32 c = Next();	4023 uc32 c = Next();

3928 Advance(2);	4024 Advance(2);

3929 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);	4025 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

3930 CharacterRange::AddClassEscape(c, ranges);	4026 CharacterRange::AddClassEscape(c, ranges);

3931 RegExpTree* atom = new RegExpCharacterClass(ranges, false);	4027 RegExpTree* atom = new RegExpCharacterClass(ranges, false);

3932 builder.AddAtom(atom);	4028 builder->AddAtom(atom);

3933 goto has_read_atom; // Avoid setting has_character_escapes_.	4029 goto has_read_atom; // Avoid setting has_character_escapes_.

3934 }	4030 }

3935 case '1': case '2': case '3': case '4': case '5': case '6':	4031 case '1': case '2': case '3': case '4': case '5': case '6':

3936 case '7': case '8': case '9': {	4032 case '7': case '8': case '9': {

3937 int index = 0;	4033 int index = 0;

3938 if (ParseBackReferenceIndex(&index)) {	4034 if (ParseBackReferenceIndex(&index)) {

3939 if (!CaptureAvailable(index - 1)) {	4035 RegExpCapture* capture = NULL;

3940 // Prepare to ignore a following quantifier	4036 if (captures_ != NULL && index <= captures_->length()) {

3941 builder.AddEmpty();	4037 capture = captures_->at(index - 1);

	4038 }

	4039 if (capture == NULL) {

	4040 builder->AddEmpty();

3942 goto has_read_atom;	4041 goto has_read_atom;

3943 }	4042 }

3944 RegExpCapture* capture = captures_->at(index - 1);

3945 RegExpTree* atom = new RegExpBackReference(capture);	4043 RegExpTree* atom = new RegExpBackReference(capture);

3946 builder.AddAtom(atom);	4044 builder->AddAtom(atom);

3947 goto has_read_atom; // Avoid setting has_character_escapes_.	4045 goto has_read_atom; // Avoid setting has_character_escapes_.

3948 }	4046 }

3949 uc32 first_digit = Next();	4047 uc32 first_digit = Next();

3950 if (first_digit == '8' \|\| first_digit == '9') {	4048 if (first_digit == '8' \|\| first_digit == '9') {

3951 // Treat as identity escape	4049 // Treat as identity escape

3952 builder.AddCharacter(first_digit);	4050 builder->AddCharacter(first_digit);

3953 Advance(2);	4051 Advance(2);

3954 break;	4052 break;

3955 }	4053 }

3956 }	4054 }

3957 // FALLTHROUGH	4055 // FALLTHROUGH

3958 case '0': {	4056 case '0': {

3959 Advance();	4057 Advance();

3960 uc32 octal = ParseOctalLiteral();	4058 uc32 octal = ParseOctalLiteral();

3961 builder.AddCharacter(octal);	4059 builder->AddCharacter(octal);

3962 break;	4060 break;

3963 }	4061 }

3964 // ControlEscape :: one of	4062 // ControlEscape :: one of

3965 // f n r t v	4063 // f n r t v

3966 case 'f':	4064 case 'f':

3967 Advance(2);	4065 Advance(2);

3968 builder.AddCharacter('\f');	4066 builder->AddCharacter('\f');

3969 break;	4067 break;

3970 case 'n':	4068 case 'n':

3971 Advance(2);	4069 Advance(2);

3972 builder.AddCharacter('\n');	4070 builder->AddCharacter('\n');

3973 break;	4071 break;

3974 case 'r':	4072 case 'r':

3975 Advance(2);	4073 Advance(2);

3976 builder.AddCharacter('\r');	4074 builder->AddCharacter('\r');

3977 break;	4075 break;

3978 case 't':	4076 case 't':

3979 Advance(2);	4077 Advance(2);

3980 builder.AddCharacter('\t');	4078 builder->AddCharacter('\t');

3981 break;	4079 break;

3982 case 'v':	4080 case 'v':

3983 Advance(2);	4081 Advance(2);

3984 builder.AddCharacter('\v');	4082 builder->AddCharacter('\v');

3985 break;	4083 break;

3986 case 'c': {	4084 case 'c': {

3987 Advance(2);	4085 Advance(2);

3988 uc32 control = ParseControlLetterEscape();	4086 uc32 control = ParseControlLetterEscape();

3989 builder.AddCharacter(control);	4087 builder->AddCharacter(control);

3990 break;	4088 break;

3991 }	4089 }

3992 case 'x': {	4090 case 'x': {

3993 Advance(2);	4091 Advance(2);

3994 uc32 value;	4092 uc32 value;

3995 if (ParseHexEscape(2, &value)) {	4093 if (ParseHexEscape(2, &value)) {

3996 builder.AddCharacter(value);	4094 builder->AddCharacter(value);

3997 } else {	4095 } else {

3998 builder.AddCharacter('x');	4096 builder->AddCharacter('x');

3999 }	4097 }

4000 break;	4098 break;

4001 }	4099 }

4002 case 'u': {	4100 case 'u': {

4003 Advance(2);	4101 Advance(2);

4004 uc32 value;	4102 uc32 value;

4005 if (ParseHexEscape(4, &value)) {	4103 if (ParseHexEscape(4, &value)) {

4006 builder.AddCharacter(value);	4104 builder->AddCharacter(value);

4007 } else {	4105 } else {

4008 builder.AddCharacter('u');	4106 builder->AddCharacter('u');

4009 }	4107 }

4010 break;	4108 break;

4011 }	4109 }

4012 default:	4110 default:

4013 // Identity escape.	4111 // Identity escape.

4014 builder.AddCharacter(Next());	4112 builder->AddCharacter(Next());

4015 Advance(2);	4113 Advance(2);

4016 break;	4114 break;

4017 }	4115 }

4018 break;	4116 break;

4019 case '{': {	4117 case '{': {

4020 int dummy;	4118 int dummy;

4021 if (ParseIntervalQuantifier(&dummy, &dummy)) {	4119 if (ParseIntervalQuantifier(&dummy, &dummy)) {

4022 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);	4120 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);

4023 }	4121 }

4024 // fallthrough	4122 // fallthrough

4025 }	4123 }

4026 default:	4124 default:

4027 builder.AddCharacter(current());	4125 builder->AddCharacter(current());

4028 Advance();	4126 Advance();

4029 break;	4127 break;

4030 } // end switch(current())	4128 } // end switch(current())

4031	4129

4032 has_read_atom:	4130 has_read_atom:

4033 int min;	4131 int min;

4034 int max;	4132 int max;

4035 switch (current()) {	4133 switch (current()) {

4036 // QuantifierPrefix ::	4134 // QuantifierPrefix ::

4037 // *	4135 // *

(...skipping 26 matching lines...) Expand all Loading...
4064 continue;	4162 continue;

4065 }	4163 }

4066 default:	4164 default:

4067 continue;	4165 continue;

4068 }	4166 }

4069 bool is_greedy = true;	4167 bool is_greedy = true;

4070 if (current() == '?') {	4168 if (current() == '?') {

4071 is_greedy = false;	4169 is_greedy = false;

4072 Advance();	4170 Advance();

4073 }	4171 }

4074 builder.AddQuantifierToAtom(min, max, is_greedy);	4172 builder->AddQuantifierToAtom(min, max, is_greedy);

4075 }	4173 }

4076 }	4174 }

4077	4175

4078 class SourceCharacter {	4176 class SourceCharacter {

4079 public:	4177 public:

4080 static bool Is(uc32 c) {	4178 static bool Is(uc32 c) {

4081 switch (c) {	4179 switch (c) {

4082 // case ']': case '}':	4180 // case ']': case '}':

4083 // In spidermonkey and jsc these are treated as source characters	4181 // In spidermonkey and jsc these are treated as source characters

4084 // so we do too.	4182 // so we do too.

(...skipping 290 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4375 // by the ECMAScript specification.	4473 // by the ECMAScript specification.

4376 uc32 result = current();	4474 uc32 result = current();

4377 Advance();	4475 Advance();

4378 return result;	4476 return result;

4379 }	4477 }

4380 }	4478 }

4381 return 0;	4479 return 0;

4382 }	4480 }

4383	4481

4384	4482

4385 RegExpTree* RegExpParser::ParseGroup() {

4386 ASSERT_EQ(current(), '(');

4387 char type = '(';

4388 Advance();

4389 if (current() == '?') {

4390 switch (Next()) {

4391 case ':': case '=': case '!':

4392 type = Next();

4393 Advance(2);

4394 break;

4395 default:

4396 ReportError(CStrVector("Invalid group") CHECK_FAILED);

4397 break;

4398 }

4399 } else {

4400 if (captures_ == NULL) {

4401 captures_ = new ZoneList<RegExpCapture*>(2);

4402 }

4403 if (captures_started() >= kMaxCaptures) {

4404 ReportError(CStrVector("Too many captures") CHECK_FAILED);

4405 }

4406 captures_->Add(NULL);

4407 }

4408 int capture_index = captures_started();

4409 RegExpTree* body = ParseDisjunction(CHECK_FAILED);

4410 if (current() != ')') {

4411 ReportError(CStrVector("Unterminated group") CHECK_FAILED);

4412 }

4413 Advance();

4414

4415 int end_capture_index = captures_started();

4416 if (type == '!') {

4417 // Captures inside a negative lookahead are never available outside it.

4418 for (int i = capture_index; i < end_capture_index; i++) {

4419 RegExpCapture* capture = captures_->at(i);

4420 ASSERT(capture != NULL);

4421 capture->set_available(CAPTURE_PERMANENTLY_UNREACHABLE);

4422 }

4423 } else {

4424 // Captures temporarily unavailable because they are in different

4425 // alternatives are all available after the disjunction.

4426 for (int i = capture_index; i < end_capture_index; i++) {

4427 RegExpCapture* capture = captures_->at(i);

4428 ASSERT(capture != NULL);

4429 if (capture->available() == CAPTURE_UNREACHABLE) {

4430 capture->set_available(CAPTURE_AVAILABLE);

4431 }

4432 }

4433 }

4434

4435 if (type == '(') {

4436 RegExpCapture* capture = new RegExpCapture(body, capture_index);

4437 captures_->at(capture_index - 1) = capture;

4438 return capture;

4439 } else if (type == ':') {

4440 return body;

4441 } else {

4442 ASSERT(type == '=' \|\| type == '!');

4443 bool is_positive = (type == '=');

4444 return new RegExpLookahead(body,

4445 is_positive,

4446 end_capture_index - capture_index,

4447 capture_index);

4448 }

4449 }

4450

4451

4452 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {	4483 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {

4453 ASSERT_EQ(0, *char_class);	4484 ASSERT_EQ(0, *char_class);

4454 uc32 first = current();	4485 uc32 first = current();

4455 if (first == '\\') {	4486 if (first == '\\') {

4456 switch (Next()) {	4487 switch (Next()) {

4457 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {	4488 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {

4458 *char_class = Next();	4489 *char_class = Next();

4459 Advance(2);	4490 Advance(2);

4460 return CharacterRange::Singleton(0); // Return dummy value.	4491 return CharacterRange::Singleton(0); // Return dummy value.

4461 }	4492 }

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4646 start_position,	4677 start_position,

4647 is_expression);	4678 is_expression);

4648 return result;	4679 return result;

4649 }	4680 }

4650	4681

4651	4682

4652 #undef NEW	4683 #undef NEW

4653	4684

4654	4685

4655 } } // namespace v8::internal	4686 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/ast.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »