Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(221)

Side by Side Diff: src/parser.cc

Issue 149069: Changed RegExp parser to use a recursive data structure instead of stack-based recursion. (Closed)
Patch Set: Removed static nonparticipating capture optimization. Not worth the complexity. Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ast.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
354 } 354 }
355 return list_; 355 return list_;
356 } 356 }
357 357
358 private: 358 private:
359 ZoneList<T*>* list_; 359 ZoneList<T*>* list_;
360 T* last_; 360 T* last_;
361 }; 361 };
362 362
363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. 363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
364 class RegExpBuilder { 364 class RegExpBuilder: public ZoneObject {
365 public: 365 public:
366 RegExpBuilder(); 366 RegExpBuilder();
367 void AddCharacter(uc16 character); 367 void AddCharacter(uc16 character);
368 // "Adds" an empty expression. Does nothing except consume a 368 // "Adds" an empty expression. Does nothing except consume a
369 // following quantifier 369 // following quantifier
370 void AddEmpty(); 370 void AddEmpty();
371 void AddAtom(RegExpTree* tree); 371 void AddAtom(RegExpTree* tree);
372 void AddAssertion(RegExpTree* tree); 372 void AddAssertion(RegExpTree* tree);
373 void NewAlternative(); // '|' 373 void NewAlternative(); // '|'
374 void AddQuantifierToAtom(int min, int max, bool is_greedy); 374 void AddQuantifierToAtom(int min, int max, bool is_greedy);
(...skipping 10 matching lines...) Expand all
385 #ifdef DEBUG 385 #ifdef DEBUG
386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
387 #define LAST(x) last_added_ = x; 387 #define LAST(x) last_added_ = x;
388 #else 388 #else
389 #define LAST(x) 389 #define LAST(x)
390 #endif 390 #endif
391 }; 391 };
392 392
393 393
394 RegExpBuilder::RegExpBuilder() 394 RegExpBuilder::RegExpBuilder()
395 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() 395 : pending_empty_(false),
396 characters_(NULL),
397 terms_(),
398 alternatives_()
396 #ifdef DEBUG 399 #ifdef DEBUG
397 , last_added_(ADD_NONE) 400 , last_added_(ADD_NONE)
398 #endif 401 #endif
399 {} 402 {}
400 403
401 404
402 void RegExpBuilder::FlushCharacters() { 405 void RegExpBuilder::FlushCharacters() {
403 pending_empty_ = false; 406 pending_empty_ = false;
404 if (characters_ != NULL) { 407 if (characters_ != NULL) {
405 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); 408 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
587 bool simple(); 590 bool simple();
588 bool contains_anchor() { return contains_anchor_; } 591 bool contains_anchor() { return contains_anchor_; }
589 void set_contains_anchor() { contains_anchor_ = true; } 592 void set_contains_anchor() { contains_anchor_ = true; }
590 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 593 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
591 int position() { return next_pos_ - 1; } 594 int position() { return next_pos_ - 1; }
592 bool failed() { return failed_; } 595 bool failed() { return failed_; }
593 596
594 static const int kMaxCaptures = 1 << 16; 597 static const int kMaxCaptures = 1 << 16;
595 static const uc32 kEndMarker = (1 << 21); 598 static const uc32 kEndMarker = (1 << 21);
596 private: 599 private:
600 enum SubexpressionType {
601 INITIAL,
602 CAPTURE, // All positive values represent captures.
603 POSITIVE_LOOKAHEAD,
604 NEGATIVE_LOOKAHEAD,
605 GROUPING
606 };
607
608 struct RegExpParserState : public ZoneObject {
Erik Corry 2009/07/01 11:29:14 Please make this a real object.
609 RegExpParserState(RegExpParserState* previous_state,
610 RegExpBuilder* builder,
611 int disjunction_capture_index,
612 SubexpressionType group_type)
613 : previous_state(previous_state),
614 builder(builder),
615 disjunction_capture_index(disjunction_capture_index),
616 group_type(group_type) {}
617 // Linked list implementation of stack of states.
618 RegExpParserState* previous_state;
619 // Builder for the stored disjunction.
620 RegExpBuilder* builder;
621 // Stored disjunction's capture index (if any).
622 int disjunction_capture_index;
623 // Stored disjunction type (capture, look-ahead or grouping), if any.
624 SubexpressionType group_type;
625 };
597 626
598 uc32 current() { return current_; } 627 uc32 current() { return current_; }
599 bool has_more() { return has_more_; } 628 bool has_more() { return has_more_; }
600 bool has_next() { return next_pos_ < in()->length(); } 629 bool has_next() { return next_pos_ < in()->length(); }
601 uc32 Next(); 630 uc32 Next();
602 FlatStringReader* in() { return in_; } 631 FlatStringReader* in() { return in_; }
603 void ScanForCaptures(); 632 void ScanForCaptures();
604 bool CaptureAvailable(int index);
605 uc32 current_; 633 uc32 current_;
606 bool has_more_; 634 bool has_more_;
607 bool multiline_; 635 bool multiline_;
608 int next_pos_; 636 int next_pos_;
609 FlatStringReader* in_; 637 FlatStringReader* in_;
610 Handle<String>* error_; 638 Handle<String>* error_;
611 bool simple_; 639 bool simple_;
612 bool contains_anchor_; 640 bool contains_anchor_;
613 ZoneList<RegExpCapture*>* captures_; 641 ZoneList<RegExpCapture*>* captures_;
614 bool is_scanned_for_captures_; 642 bool is_scanned_for_captures_;
(...skipping 3198 matching lines...) Expand 10 before | Expand all | Expand 10 after
3813 } 3841 }
3814 // If the result of parsing is a literal string atom, and it has the 3842 // If the result of parsing is a literal string atom, and it has the
3815 // same length as the input, then the atom is identical to the input. 3843 // same length as the input, then the atom is identical to the input.
3816 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { 3844 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {
3817 simple_ = true; 3845 simple_ = true;
3818 } 3846 }
3819 return result; 3847 return result;
3820 } 3848 }
3821 3849
3822 3850
3823 bool RegExpParser::CaptureAvailable(int index) {
3824 if (captures_ == NULL) return false;
3825 if (index >= captures_->length()) return false;
3826 RegExpCapture* capture = captures_->at(index);
3827 return capture != NULL && capture->available() == CAPTURE_AVAILABLE;
3828 }
3829
3830
3831 // Disjunction :: 3851 // Disjunction ::
3832 // Alternative 3852 // Alternative
3833 // Alternative | Disjunction 3853 // Alternative | Disjunction
3834 // Alternative :: 3854 // Alternative ::
3835 // [empty] 3855 // [empty]
3836 // Term Alternative 3856 // Term Alternative
3837 // Term :: 3857 // Term ::
3838 // Assertion 3858 // Assertion
3839 // Atom 3859 // Atom
3840 // Atom Quantifier 3860 // Atom Quantifier
3841 RegExpTree* RegExpParser::ParseDisjunction() { 3861 RegExpTree* RegExpParser::ParseDisjunction() {
3842 RegExpBuilder builder; 3862 // Used to store current state while parsing subexpressions.
3843 int capture_start_index = captures_started(); 3863 RegExpParserState* stored_state = NULL;
Erik Corry 2009/07/01 11:29:14 If you initialize this to an initial stored state
3864 RegExpBuilder* builder = new RegExpBuilder();
3865 SubexpressionType group_type = INITIAL;
3866 // Index in captures array of first capture in this sub-expression, if any.
3867 // Also the capture index of this sub-expression itself, if group_type
3868 // is CAPTURE.
3869 int disjunction_capture_index = 0;
3844 while (true) { 3870 while (true) {
3845 switch (current()) { 3871 switch (current()) {
3846 case kEndMarker: 3872 case kEndMarker:
3847 case ')': 3873 if (stored_state != NULL) {
3848 return builder.ToRegExp(); 3874 // Inside a parenthesized group when hitting end of input.
3875 ReportError(CStrVector("Unterminated group") CHECK_FAILED);
3876 }
3877 ASSERT_EQ(INITIAL, group_type);
3878 // Parsing completed successfully.
3879 return builder->ToRegExp();
3880 case ')': {
3881 if (stored_state == NULL) {
3882 ReportError(CStrVector("Unexpected ')'") CHECK_FAILED);
3883 }
3884 ASSERT_NE(INITIAL, group_type);
3885
3886 Advance();
3887 // End disjunction parsing and convert builder content to new single
3888 // regexp atom.
3889 RegExpTree* body = builder->ToRegExp();
3890
3891 int end_capture_index = captures_started();
3892
3893 int capture_index = disjunction_capture_index;
3894 SubexpressionType type = group_type;
3895
3896 // Restore previous state.
3897 builder = stored_state->builder;
3898 group_type = stored_state->group_type;
3899 disjunction_capture_index = stored_state->disjunction_capture_index;
3900 stored_state = stored_state->previous_state;
3901
3902 // Build result of subexpression.
3903 if (type == CAPTURE) {
3904 RegExpCapture* capture = new RegExpCapture(body, capture_index);
3905 captures_->at(capture_index - 1) = capture;
3906 body = capture;
3907 } else if (type != GROUPING) {
3908 ASSERT(type == POSITIVE_LOOKAHEAD || type == NEGATIVE_LOOKAHEAD);
3909 bool is_positive = (type == POSITIVE_LOOKAHEAD);
3910 body = new RegExpLookahead(body,
3911 is_positive,
3912 end_capture_index - capture_index,
3913 capture_index);
3914 }
3915 builder->AddAtom(body);
3916 break;
3917 }
3849 case '|': { 3918 case '|': {
3850 Advance(); 3919 Advance();
3851 builder.NewAlternative(); 3920 builder->NewAlternative();
3852 int capture_new_alt_start_index = captures_started();
3853 for (int i = capture_start_index; i < capture_new_alt_start_index; i++) {
3854 RegExpCapture* capture = captures_->at(i);
3855 if (capture->available() == CAPTURE_AVAILABLE) {
3856 capture->set_available(CAPTURE_UNREACHABLE);
3857 }
3858 }
3859 capture_start_index = capture_new_alt_start_index;
3860 continue; 3921 continue;
3861 } 3922 }
3862 case '*': 3923 case '*':
3863 case '+': 3924 case '+':
3864 case '?': 3925 case '?':
3865 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 3926 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
3866 case '^': { 3927 case '^': {
3867 Advance(); 3928 Advance();
3868 if (multiline_) { 3929 if (multiline_) {
3869 builder.AddAssertion( 3930 builder->AddAssertion(
3870 new RegExpAssertion(RegExpAssertion::START_OF_LINE)); 3931 new RegExpAssertion(RegExpAssertion::START_OF_LINE));
3871 } else { 3932 } else {
3872 builder.AddAssertion( 3933 builder->AddAssertion(
3873 new RegExpAssertion(RegExpAssertion::START_OF_INPUT)); 3934 new RegExpAssertion(RegExpAssertion::START_OF_INPUT));
3874 set_contains_anchor(); 3935 set_contains_anchor();
3875 } 3936 }
3876 continue; 3937 continue;
3877 } 3938 }
3878 case '$': { 3939 case '$': {
3879 Advance(); 3940 Advance();
3880 RegExpAssertion::Type type = 3941 RegExpAssertion::Type type =
3881 multiline_ ? RegExpAssertion::END_OF_LINE : 3942 multiline_ ? RegExpAssertion::END_OF_LINE :
3882 RegExpAssertion::END_OF_INPUT; 3943 RegExpAssertion::END_OF_INPUT;
3883 builder.AddAssertion(new RegExpAssertion(type)); 3944 builder->AddAssertion(new RegExpAssertion(type));
3884 continue; 3945 continue;
3885 } 3946 }
3886 case '.': { 3947 case '.': {
3887 Advance(); 3948 Advance();
3888 // everything except \x0a, \x0d, \u2028 and \u2029 3949 // everything except \x0a, \x0d, \u2028 and \u2029
3889 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 3950 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3890 CharacterRange::AddClassEscape('.', ranges); 3951 CharacterRange::AddClassEscape('.', ranges);
3891 RegExpTree* atom = new RegExpCharacterClass(ranges, false); 3952 RegExpTree* atom = new RegExpCharacterClass(ranges, false);
3892 builder.AddAtom(atom); 3953 builder->AddAtom(atom);
3893 break; 3954 break;
3894 } 3955 }
3895 case '(': { 3956 case '(': {
3896 RegExpTree* atom = ParseGroup(CHECK_FAILED); 3957 SubexpressionType type = CAPTURE;
3897 builder.AddAtom(atom); 3958 Advance();
3959 if (current() == '?') {
3960 switch (Next()) {
3961 case ':':
3962 type = GROUPING;
3963 break;
3964 case '=':
3965 type = POSITIVE_LOOKAHEAD;
3966 break;
3967 case '!':
3968 type = NEGATIVE_LOOKAHEAD;
3969 break;
3970 default:
3971 ReportError(CStrVector("Invalid group") CHECK_FAILED);
3972 break;
3973 }
3974 Advance(2);
3975 } else {
3976 if (captures_ == NULL) {
3977 captures_ = new ZoneList<RegExpCapture*>(2);
3978 }
3979 if (captures_started() >= kMaxCaptures) {
3980 ReportError(CStrVector("Too many captures") CHECK_FAILED);
3981 }
3982 captures_->Add(NULL);
3983 }
3984 // Store current state and begin new disjunction parsing.
3985 stored_state = new RegExpParserState(stored_state,
3986 builder,
3987 disjunction_capture_index,
3988 group_type);
3989 builder = new RegExpBuilder();
3990 group_type = type;
3991 if (type == CAPTURE) {
3992 disjunction_capture_index = captures_started();
3993 }
3898 break; 3994 break;
3899 } 3995 }
3900 case '[': { 3996 case '[': {
3901 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); 3997 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
3902 builder.AddAtom(atom); 3998 builder->AddAtom(atom);
3903 break; 3999 break;
3904 } 4000 }
3905 // Atom :: 4001 // Atom ::
3906 // \ AtomEscape 4002 // \ AtomEscape
3907 case '\\': 4003 case '\\':
3908 switch (Next()) { 4004 switch (Next()) {
3909 case kEndMarker: 4005 case kEndMarker:
3910 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); 4006 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED);
3911 case 'b': 4007 case 'b':
3912 Advance(2); 4008 Advance(2);
3913 builder.AddAssertion( 4009 builder->AddAssertion(
3914 new RegExpAssertion(RegExpAssertion::BOUNDARY)); 4010 new RegExpAssertion(RegExpAssertion::BOUNDARY));
3915 continue; 4011 continue;
3916 case 'B': 4012 case 'B':
3917 Advance(2); 4013 Advance(2);
3918 builder.AddAssertion( 4014 builder->AddAssertion(
3919 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); 4015 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
3920 continue; 4016 continue;
3921 // AtomEscape :: 4017 // AtomEscape ::
3922 // CharacterClassEscape 4018 // CharacterClassEscape
3923 // 4019 //
3924 // CharacterClassEscape :: one of 4020 // CharacterClassEscape :: one of
3925 // d D s S w W 4021 // d D s S w W
3926 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { 4022 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
3927 uc32 c = Next(); 4023 uc32 c = Next();
3928 Advance(2); 4024 Advance(2);
3929 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 4025 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3930 CharacterRange::AddClassEscape(c, ranges); 4026 CharacterRange::AddClassEscape(c, ranges);
3931 RegExpTree* atom = new RegExpCharacterClass(ranges, false); 4027 RegExpTree* atom = new RegExpCharacterClass(ranges, false);
3932 builder.AddAtom(atom); 4028 builder->AddAtom(atom);
3933 goto has_read_atom; // Avoid setting has_character_escapes_. 4029 goto has_read_atom; // Avoid setting has_character_escapes_.
3934 } 4030 }
3935 case '1': case '2': case '3': case '4': case '5': case '6': 4031 case '1': case '2': case '3': case '4': case '5': case '6':
3936 case '7': case '8': case '9': { 4032 case '7': case '8': case '9': {
3937 int index = 0; 4033 int index = 0;
3938 if (ParseBackReferenceIndex(&index)) { 4034 if (ParseBackReferenceIndex(&index)) {
3939 if (!CaptureAvailable(index - 1)) { 4035 RegExpCapture* capture = NULL;
3940 // Prepare to ignore a following quantifier 4036 if (captures_ != NULL && index <= captures_->length()) {
3941 builder.AddEmpty(); 4037 capture = captures_->at(index - 1);
4038 }
4039 if (capture == NULL) {
4040 builder->AddEmpty();
3942 goto has_read_atom; 4041 goto has_read_atom;
3943 } 4042 }
3944 RegExpCapture* capture = captures_->at(index - 1);
3945 RegExpTree* atom = new RegExpBackReference(capture); 4043 RegExpTree* atom = new RegExpBackReference(capture);
3946 builder.AddAtom(atom); 4044 builder->AddAtom(atom);
3947 goto has_read_atom; // Avoid setting has_character_escapes_. 4045 goto has_read_atom; // Avoid setting has_character_escapes_.
3948 } 4046 }
3949 uc32 first_digit = Next(); 4047 uc32 first_digit = Next();
3950 if (first_digit == '8' || first_digit == '9') { 4048 if (first_digit == '8' || first_digit == '9') {
3951 // Treat as identity escape 4049 // Treat as identity escape
3952 builder.AddCharacter(first_digit); 4050 builder->AddCharacter(first_digit);
3953 Advance(2); 4051 Advance(2);
3954 break; 4052 break;
3955 } 4053 }
3956 } 4054 }
3957 // FALLTHROUGH 4055 // FALLTHROUGH
3958 case '0': { 4056 case '0': {
3959 Advance(); 4057 Advance();
3960 uc32 octal = ParseOctalLiteral(); 4058 uc32 octal = ParseOctalLiteral();
3961 builder.AddCharacter(octal); 4059 builder->AddCharacter(octal);
3962 break; 4060 break;
3963 } 4061 }
3964 // ControlEscape :: one of 4062 // ControlEscape :: one of
3965 // f n r t v 4063 // f n r t v
3966 case 'f': 4064 case 'f':
3967 Advance(2); 4065 Advance(2);
3968 builder.AddCharacter('\f'); 4066 builder->AddCharacter('\f');
3969 break; 4067 break;
3970 case 'n': 4068 case 'n':
3971 Advance(2); 4069 Advance(2);
3972 builder.AddCharacter('\n'); 4070 builder->AddCharacter('\n');
3973 break; 4071 break;
3974 case 'r': 4072 case 'r':
3975 Advance(2); 4073 Advance(2);
3976 builder.AddCharacter('\r'); 4074 builder->AddCharacter('\r');
3977 break; 4075 break;
3978 case 't': 4076 case 't':
3979 Advance(2); 4077 Advance(2);
3980 builder.AddCharacter('\t'); 4078 builder->AddCharacter('\t');
3981 break; 4079 break;
3982 case 'v': 4080 case 'v':
3983 Advance(2); 4081 Advance(2);
3984 builder.AddCharacter('\v'); 4082 builder->AddCharacter('\v');
3985 break; 4083 break;
3986 case 'c': { 4084 case 'c': {
3987 Advance(2); 4085 Advance(2);
3988 uc32 control = ParseControlLetterEscape(); 4086 uc32 control = ParseControlLetterEscape();
3989 builder.AddCharacter(control); 4087 builder->AddCharacter(control);
3990 break; 4088 break;
3991 } 4089 }
3992 case 'x': { 4090 case 'x': {
3993 Advance(2); 4091 Advance(2);
3994 uc32 value; 4092 uc32 value;
3995 if (ParseHexEscape(2, &value)) { 4093 if (ParseHexEscape(2, &value)) {
3996 builder.AddCharacter(value); 4094 builder->AddCharacter(value);
3997 } else { 4095 } else {
3998 builder.AddCharacter('x'); 4096 builder->AddCharacter('x');
3999 } 4097 }
4000 break; 4098 break;
4001 } 4099 }
4002 case 'u': { 4100 case 'u': {
4003 Advance(2); 4101 Advance(2);
4004 uc32 value; 4102 uc32 value;
4005 if (ParseHexEscape(4, &value)) { 4103 if (ParseHexEscape(4, &value)) {
4006 builder.AddCharacter(value); 4104 builder->AddCharacter(value);
4007 } else { 4105 } else {
4008 builder.AddCharacter('u'); 4106 builder->AddCharacter('u');
4009 } 4107 }
4010 break; 4108 break;
4011 } 4109 }
4012 default: 4110 default:
4013 // Identity escape. 4111 // Identity escape.
4014 builder.AddCharacter(Next()); 4112 builder->AddCharacter(Next());
4015 Advance(2); 4113 Advance(2);
4016 break; 4114 break;
4017 } 4115 }
4018 break; 4116 break;
4019 case '{': { 4117 case '{': {
4020 int dummy; 4118 int dummy;
4021 if (ParseIntervalQuantifier(&dummy, &dummy)) { 4119 if (ParseIntervalQuantifier(&dummy, &dummy)) {
4022 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 4120 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
4023 } 4121 }
4024 // fallthrough 4122 // fallthrough
4025 } 4123 }
4026 default: 4124 default:
4027 builder.AddCharacter(current()); 4125 builder->AddCharacter(current());
4028 Advance(); 4126 Advance();
4029 break; 4127 break;
4030 } // end switch(current()) 4128 } // end switch(current())
4031 4129
4032 has_read_atom: 4130 has_read_atom:
4033 int min; 4131 int min;
4034 int max; 4132 int max;
4035 switch (current()) { 4133 switch (current()) {
4036 // QuantifierPrefix :: 4134 // QuantifierPrefix ::
4037 // * 4135 // *
(...skipping 26 matching lines...) Expand all
4064 continue; 4162 continue;
4065 } 4163 }
4066 default: 4164 default:
4067 continue; 4165 continue;
4068 } 4166 }
4069 bool is_greedy = true; 4167 bool is_greedy = true;
4070 if (current() == '?') { 4168 if (current() == '?') {
4071 is_greedy = false; 4169 is_greedy = false;
4072 Advance(); 4170 Advance();
4073 } 4171 }
4074 builder.AddQuantifierToAtom(min, max, is_greedy); 4172 builder->AddQuantifierToAtom(min, max, is_greedy);
4075 } 4173 }
4076 } 4174 }
4077 4175
4078 class SourceCharacter { 4176 class SourceCharacter {
4079 public: 4177 public:
4080 static bool Is(uc32 c) { 4178 static bool Is(uc32 c) {
4081 switch (c) { 4179 switch (c) {
4082 // case ']': case '}': 4180 // case ']': case '}':
4083 // In spidermonkey and jsc these are treated as source characters 4181 // In spidermonkey and jsc these are treated as source characters
4084 // so we do too. 4182 // so we do too.
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after
4375 // by the ECMAScript specification. 4473 // by the ECMAScript specification.
4376 uc32 result = current(); 4474 uc32 result = current();
4377 Advance(); 4475 Advance();
4378 return result; 4476 return result;
4379 } 4477 }
4380 } 4478 }
4381 return 0; 4479 return 0;
4382 } 4480 }
4383 4481
4384 4482
4385 RegExpTree* RegExpParser::ParseGroup() {
4386 ASSERT_EQ(current(), '(');
4387 char type = '(';
4388 Advance();
4389 if (current() == '?') {
4390 switch (Next()) {
4391 case ':': case '=': case '!':
4392 type = Next();
4393 Advance(2);
4394 break;
4395 default:
4396 ReportError(CStrVector("Invalid group") CHECK_FAILED);
4397 break;
4398 }
4399 } else {
4400 if (captures_ == NULL) {
4401 captures_ = new ZoneList<RegExpCapture*>(2);
4402 }
4403 if (captures_started() >= kMaxCaptures) {
4404 ReportError(CStrVector("Too many captures") CHECK_FAILED);
4405 }
4406 captures_->Add(NULL);
4407 }
4408 int capture_index = captures_started();
4409 RegExpTree* body = ParseDisjunction(CHECK_FAILED);
4410 if (current() != ')') {
4411 ReportError(CStrVector("Unterminated group") CHECK_FAILED);
4412 }
4413 Advance();
4414
4415 int end_capture_index = captures_started();
4416 if (type == '!') {
4417 // Captures inside a negative lookahead are never available outside it.
4418 for (int i = capture_index; i < end_capture_index; i++) {
4419 RegExpCapture* capture = captures_->at(i);
4420 ASSERT(capture != NULL);
4421 capture->set_available(CAPTURE_PERMANENTLY_UNREACHABLE);
4422 }
4423 } else {
4424 // Captures temporarily unavailable because they are in different
4425 // alternatives are all available after the disjunction.
4426 for (int i = capture_index; i < end_capture_index; i++) {
4427 RegExpCapture* capture = captures_->at(i);
4428 ASSERT(capture != NULL);
4429 if (capture->available() == CAPTURE_UNREACHABLE) {
4430 capture->set_available(CAPTURE_AVAILABLE);
4431 }
4432 }
4433 }
4434
4435 if (type == '(') {
4436 RegExpCapture* capture = new RegExpCapture(body, capture_index);
4437 captures_->at(capture_index - 1) = capture;
4438 return capture;
4439 } else if (type == ':') {
4440 return body;
4441 } else {
4442 ASSERT(type == '=' || type == '!');
4443 bool is_positive = (type == '=');
4444 return new RegExpLookahead(body,
4445 is_positive,
4446 end_capture_index - capture_index,
4447 capture_index);
4448 }
4449 }
4450
4451
4452 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { 4483 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
4453 ASSERT_EQ(0, *char_class); 4484 ASSERT_EQ(0, *char_class);
4454 uc32 first = current(); 4485 uc32 first = current();
4455 if (first == '\\') { 4486 if (first == '\\') {
4456 switch (Next()) { 4487 switch (Next()) {
4457 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { 4488 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
4458 *char_class = Next(); 4489 *char_class = Next();
4459 Advance(2); 4490 Advance(2);
4460 return CharacterRange::Singleton(0); // Return dummy value. 4491 return CharacterRange::Singleton(0); // Return dummy value.
4461 } 4492 }
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
4646 start_position, 4677 start_position,
4647 is_expression); 4678 is_expression);
4648 return result; 4679 return result;
4649 } 4680 }
4650 4681
4651 4682
4652 #undef NEW 4683 #undef NEW
4653 4684
4654 4685
4655 } } // namespace v8::internal 4686 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/ast.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698