OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
354 } | 354 } |
355 return list_; | 355 return list_; |
356 } | 356 } |
357 | 357 |
358 private: | 358 private: |
359 ZoneList<T*>* list_; | 359 ZoneList<T*>* list_; |
360 T* last_; | 360 T* last_; |
361 }; | 361 }; |
362 | 362 |
363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
364 class RegExpBuilder { | 364 class RegExpBuilder: public ZoneObject { |
365 public: | 365 public: |
366 RegExpBuilder(); | 366 RegExpBuilder(); |
367 void AddCharacter(uc16 character); | 367 void AddCharacter(uc16 character); |
368 // "Adds" an empty expression. Does nothing except consume a | 368 // "Adds" an empty expression. Does nothing except consume a |
369 // following quantifier | 369 // following quantifier |
370 void AddEmpty(); | 370 void AddEmpty(); |
371 void AddAtom(RegExpTree* tree); | 371 void AddAtom(RegExpTree* tree); |
372 void AddAssertion(RegExpTree* tree); | 372 void AddAssertion(RegExpTree* tree); |
373 void NewAlternative(); // '|' | 373 void NewAlternative(); // '|' |
374 void AddQuantifierToAtom(int min, int max, bool is_greedy); | 374 void AddQuantifierToAtom(int min, int max, bool is_greedy); |
(...skipping 10 matching lines...) Expand all Loading... | |
385 #ifdef DEBUG | 385 #ifdef DEBUG |
386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
387 #define LAST(x) last_added_ = x; | 387 #define LAST(x) last_added_ = x; |
388 #else | 388 #else |
389 #define LAST(x) | 389 #define LAST(x) |
390 #endif | 390 #endif |
391 }; | 391 }; |
392 | 392 |
393 | 393 |
394 RegExpBuilder::RegExpBuilder() | 394 RegExpBuilder::RegExpBuilder() |
395 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() | 395 : pending_empty_(false), |
396 characters_(NULL), | |
397 terms_(), | |
398 alternatives_() | |
396 #ifdef DEBUG | 399 #ifdef DEBUG |
397 , last_added_(ADD_NONE) | 400 , last_added_(ADD_NONE) |
398 #endif | 401 #endif |
399 {} | 402 {} |
400 | 403 |
401 | 404 |
402 void RegExpBuilder::FlushCharacters() { | 405 void RegExpBuilder::FlushCharacters() { |
403 pending_empty_ = false; | 406 pending_empty_ = false; |
404 if (characters_ != NULL) { | 407 if (characters_ != NULL) { |
405 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); | 408 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
587 bool simple(); | 590 bool simple(); |
588 bool contains_anchor() { return contains_anchor_; } | 591 bool contains_anchor() { return contains_anchor_; } |
589 void set_contains_anchor() { contains_anchor_ = true; } | 592 void set_contains_anchor() { contains_anchor_ = true; } |
590 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 593 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
591 int position() { return next_pos_ - 1; } | 594 int position() { return next_pos_ - 1; } |
592 bool failed() { return failed_; } | 595 bool failed() { return failed_; } |
593 | 596 |
594 static const int kMaxCaptures = 1 << 16; | 597 static const int kMaxCaptures = 1 << 16; |
595 static const uc32 kEndMarker = (1 << 21); | 598 static const uc32 kEndMarker = (1 << 21); |
596 private: | 599 private: |
600 enum SubexpressionType { | |
601 INITIAL, | |
602 CAPTURE, // All positive values represent captures. | |
603 POSITIVE_LOOKAHEAD, | |
604 NEGATIVE_LOOKAHEAD, | |
605 GROUPING | |
606 }; | |
607 | |
608 struct RegExpParserState : public ZoneObject { | |
Erik Corry
2009/07/01 11:29:14
Please make this a real object.
| |
609 RegExpParserState(RegExpParserState* previous_state, | |
610 RegExpBuilder* builder, | |
611 int disjunction_capture_index, | |
612 SubexpressionType group_type) | |
613 : previous_state(previous_state), | |
614 builder(builder), | |
615 disjunction_capture_index(disjunction_capture_index), | |
616 group_type(group_type) {} | |
617 // Linked list implementation of stack of states. | |
618 RegExpParserState* previous_state; | |
619 // Builder for the stored disjunction. | |
620 RegExpBuilder* builder; | |
621 // Stored disjunction's capture index (if any). | |
622 int disjunction_capture_index; | |
623 // Stored disjunction type (capture, look-ahead or grouping), if any. | |
624 SubexpressionType group_type; | |
625 }; | |
597 | 626 |
598 uc32 current() { return current_; } | 627 uc32 current() { return current_; } |
599 bool has_more() { return has_more_; } | 628 bool has_more() { return has_more_; } |
600 bool has_next() { return next_pos_ < in()->length(); } | 629 bool has_next() { return next_pos_ < in()->length(); } |
601 uc32 Next(); | 630 uc32 Next(); |
602 FlatStringReader* in() { return in_; } | 631 FlatStringReader* in() { return in_; } |
603 void ScanForCaptures(); | 632 void ScanForCaptures(); |
604 bool CaptureAvailable(int index); | |
605 uc32 current_; | 633 uc32 current_; |
606 bool has_more_; | 634 bool has_more_; |
607 bool multiline_; | 635 bool multiline_; |
608 int next_pos_; | 636 int next_pos_; |
609 FlatStringReader* in_; | 637 FlatStringReader* in_; |
610 Handle<String>* error_; | 638 Handle<String>* error_; |
611 bool simple_; | 639 bool simple_; |
612 bool contains_anchor_; | 640 bool contains_anchor_; |
613 ZoneList<RegExpCapture*>* captures_; | 641 ZoneList<RegExpCapture*>* captures_; |
614 bool is_scanned_for_captures_; | 642 bool is_scanned_for_captures_; |
(...skipping 3198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3813 } | 3841 } |
3814 // If the result of parsing is a literal string atom, and it has the | 3842 // If the result of parsing is a literal string atom, and it has the |
3815 // same length as the input, then the atom is identical to the input. | 3843 // same length as the input, then the atom is identical to the input. |
3816 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 3844 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
3817 simple_ = true; | 3845 simple_ = true; |
3818 } | 3846 } |
3819 return result; | 3847 return result; |
3820 } | 3848 } |
3821 | 3849 |
3822 | 3850 |
3823 bool RegExpParser::CaptureAvailable(int index) { | |
3824 if (captures_ == NULL) return false; | |
3825 if (index >= captures_->length()) return false; | |
3826 RegExpCapture* capture = captures_->at(index); | |
3827 return capture != NULL && capture->available() == CAPTURE_AVAILABLE; | |
3828 } | |
3829 | |
3830 | |
3831 // Disjunction :: | 3851 // Disjunction :: |
3832 // Alternative | 3852 // Alternative |
3833 // Alternative | Disjunction | 3853 // Alternative | Disjunction |
3834 // Alternative :: | 3854 // Alternative :: |
3835 // [empty] | 3855 // [empty] |
3836 // Term Alternative | 3856 // Term Alternative |
3837 // Term :: | 3857 // Term :: |
3838 // Assertion | 3858 // Assertion |
3839 // Atom | 3859 // Atom |
3840 // Atom Quantifier | 3860 // Atom Quantifier |
3841 RegExpTree* RegExpParser::ParseDisjunction() { | 3861 RegExpTree* RegExpParser::ParseDisjunction() { |
3842 RegExpBuilder builder; | 3862 // Used to store current state while parsing subexpressions. |
3843 int capture_start_index = captures_started(); | 3863 RegExpParserState* stored_state = NULL; |
Erik Corry
2009/07/01 11:29:14
If you initialize this to an initial stored state
| |
3864 RegExpBuilder* builder = new RegExpBuilder(); | |
3865 SubexpressionType group_type = INITIAL; | |
3866 // Index in captures array of first capture in this sub-expression, if any. | |
3867 // Also the capture index of this sub-expression itself, if group_type | |
3868 // is CAPTURE. | |
3869 int disjunction_capture_index = 0; | |
3844 while (true) { | 3870 while (true) { |
3845 switch (current()) { | 3871 switch (current()) { |
3846 case kEndMarker: | 3872 case kEndMarker: |
3847 case ')': | 3873 if (stored_state != NULL) { |
3848 return builder.ToRegExp(); | 3874 // Inside a parenthesized group when hitting end of input. |
3875 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | |
3876 } | |
3877 ASSERT_EQ(INITIAL, group_type); | |
3878 // Parsing completed successfully. | |
3879 return builder->ToRegExp(); | |
3880 case ')': { | |
3881 if (stored_state == NULL) { | |
3882 ReportError(CStrVector("Unexpected ')'") CHECK_FAILED); | |
3883 } | |
3884 ASSERT_NE(INITIAL, group_type); | |
3885 | |
3886 Advance(); | |
3887 // End disjunction parsing and convert builder content to new single | |
3888 // regexp atom. | |
3889 RegExpTree* body = builder->ToRegExp(); | |
3890 | |
3891 int end_capture_index = captures_started(); | |
3892 | |
3893 int capture_index = disjunction_capture_index; | |
3894 SubexpressionType type = group_type; | |
3895 | |
3896 // Restore previous state. | |
3897 builder = stored_state->builder; | |
3898 group_type = stored_state->group_type; | |
3899 disjunction_capture_index = stored_state->disjunction_capture_index; | |
3900 stored_state = stored_state->previous_state; | |
3901 | |
3902 // Build result of subexpression. | |
3903 if (type == CAPTURE) { | |
3904 RegExpCapture* capture = new RegExpCapture(body, capture_index); | |
3905 captures_->at(capture_index - 1) = capture; | |
3906 body = capture; | |
3907 } else if (type != GROUPING) { | |
3908 ASSERT(type == POSITIVE_LOOKAHEAD || type == NEGATIVE_LOOKAHEAD); | |
3909 bool is_positive = (type == POSITIVE_LOOKAHEAD); | |
3910 body = new RegExpLookahead(body, | |
3911 is_positive, | |
3912 end_capture_index - capture_index, | |
3913 capture_index); | |
3914 } | |
3915 builder->AddAtom(body); | |
3916 break; | |
3917 } | |
3849 case '|': { | 3918 case '|': { |
3850 Advance(); | 3919 Advance(); |
3851 builder.NewAlternative(); | 3920 builder->NewAlternative(); |
3852 int capture_new_alt_start_index = captures_started(); | |
3853 for (int i = capture_start_index; i < capture_new_alt_start_index; i++) { | |
3854 RegExpCapture* capture = captures_->at(i); | |
3855 if (capture->available() == CAPTURE_AVAILABLE) { | |
3856 capture->set_available(CAPTURE_UNREACHABLE); | |
3857 } | |
3858 } | |
3859 capture_start_index = capture_new_alt_start_index; | |
3860 continue; | 3921 continue; |
3861 } | 3922 } |
3862 case '*': | 3923 case '*': |
3863 case '+': | 3924 case '+': |
3864 case '?': | 3925 case '?': |
3865 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 3926 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
3866 case '^': { | 3927 case '^': { |
3867 Advance(); | 3928 Advance(); |
3868 if (multiline_) { | 3929 if (multiline_) { |
3869 builder.AddAssertion( | 3930 builder->AddAssertion( |
3870 new RegExpAssertion(RegExpAssertion::START_OF_LINE)); | 3931 new RegExpAssertion(RegExpAssertion::START_OF_LINE)); |
3871 } else { | 3932 } else { |
3872 builder.AddAssertion( | 3933 builder->AddAssertion( |
3873 new RegExpAssertion(RegExpAssertion::START_OF_INPUT)); | 3934 new RegExpAssertion(RegExpAssertion::START_OF_INPUT)); |
3874 set_contains_anchor(); | 3935 set_contains_anchor(); |
3875 } | 3936 } |
3876 continue; | 3937 continue; |
3877 } | 3938 } |
3878 case '$': { | 3939 case '$': { |
3879 Advance(); | 3940 Advance(); |
3880 RegExpAssertion::Type type = | 3941 RegExpAssertion::Type type = |
3881 multiline_ ? RegExpAssertion::END_OF_LINE : | 3942 multiline_ ? RegExpAssertion::END_OF_LINE : |
3882 RegExpAssertion::END_OF_INPUT; | 3943 RegExpAssertion::END_OF_INPUT; |
3883 builder.AddAssertion(new RegExpAssertion(type)); | 3944 builder->AddAssertion(new RegExpAssertion(type)); |
3884 continue; | 3945 continue; |
3885 } | 3946 } |
3886 case '.': { | 3947 case '.': { |
3887 Advance(); | 3948 Advance(); |
3888 // everything except \x0a, \x0d, \u2028 and \u2029 | 3949 // everything except \x0a, \x0d, \u2028 and \u2029 |
3889 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3950 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
3890 CharacterRange::AddClassEscape('.', ranges); | 3951 CharacterRange::AddClassEscape('.', ranges); |
3891 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 3952 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
3892 builder.AddAtom(atom); | 3953 builder->AddAtom(atom); |
3893 break; | 3954 break; |
3894 } | 3955 } |
3895 case '(': { | 3956 case '(': { |
3896 RegExpTree* atom = ParseGroup(CHECK_FAILED); | 3957 SubexpressionType type = CAPTURE; |
3897 builder.AddAtom(atom); | 3958 Advance(); |
3959 if (current() == '?') { | |
3960 switch (Next()) { | |
3961 case ':': | |
3962 type = GROUPING; | |
3963 break; | |
3964 case '=': | |
3965 type = POSITIVE_LOOKAHEAD; | |
3966 break; | |
3967 case '!': | |
3968 type = NEGATIVE_LOOKAHEAD; | |
3969 break; | |
3970 default: | |
3971 ReportError(CStrVector("Invalid group") CHECK_FAILED); | |
3972 break; | |
3973 } | |
3974 Advance(2); | |
3975 } else { | |
3976 if (captures_ == NULL) { | |
3977 captures_ = new ZoneList<RegExpCapture*>(2); | |
3978 } | |
3979 if (captures_started() >= kMaxCaptures) { | |
3980 ReportError(CStrVector("Too many captures") CHECK_FAILED); | |
3981 } | |
3982 captures_->Add(NULL); | |
3983 } | |
3984 // Store current state and begin new disjunction parsing. | |
3985 stored_state = new RegExpParserState(stored_state, | |
3986 builder, | |
3987 disjunction_capture_index, | |
3988 group_type); | |
3989 builder = new RegExpBuilder(); | |
3990 group_type = type; | |
3991 if (type == CAPTURE) { | |
3992 disjunction_capture_index = captures_started(); | |
3993 } | |
3898 break; | 3994 break; |
3899 } | 3995 } |
3900 case '[': { | 3996 case '[': { |
3901 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 3997 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
3902 builder.AddAtom(atom); | 3998 builder->AddAtom(atom); |
3903 break; | 3999 break; |
3904 } | 4000 } |
3905 // Atom :: | 4001 // Atom :: |
3906 // \ AtomEscape | 4002 // \ AtomEscape |
3907 case '\\': | 4003 case '\\': |
3908 switch (Next()) { | 4004 switch (Next()) { |
3909 case kEndMarker: | 4005 case kEndMarker: |
3910 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); | 4006 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); |
3911 case 'b': | 4007 case 'b': |
3912 Advance(2); | 4008 Advance(2); |
3913 builder.AddAssertion( | 4009 builder->AddAssertion( |
3914 new RegExpAssertion(RegExpAssertion::BOUNDARY)); | 4010 new RegExpAssertion(RegExpAssertion::BOUNDARY)); |
3915 continue; | 4011 continue; |
3916 case 'B': | 4012 case 'B': |
3917 Advance(2); | 4013 Advance(2); |
3918 builder.AddAssertion( | 4014 builder->AddAssertion( |
3919 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | 4015 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
3920 continue; | 4016 continue; |
3921 // AtomEscape :: | 4017 // AtomEscape :: |
3922 // CharacterClassEscape | 4018 // CharacterClassEscape |
3923 // | 4019 // |
3924 // CharacterClassEscape :: one of | 4020 // CharacterClassEscape :: one of |
3925 // d D s S w W | 4021 // d D s S w W |
3926 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 4022 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
3927 uc32 c = Next(); | 4023 uc32 c = Next(); |
3928 Advance(2); | 4024 Advance(2); |
3929 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 4025 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
3930 CharacterRange::AddClassEscape(c, ranges); | 4026 CharacterRange::AddClassEscape(c, ranges); |
3931 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 4027 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
3932 builder.AddAtom(atom); | 4028 builder->AddAtom(atom); |
3933 goto has_read_atom; // Avoid setting has_character_escapes_. | 4029 goto has_read_atom; // Avoid setting has_character_escapes_. |
3934 } | 4030 } |
3935 case '1': case '2': case '3': case '4': case '5': case '6': | 4031 case '1': case '2': case '3': case '4': case '5': case '6': |
3936 case '7': case '8': case '9': { | 4032 case '7': case '8': case '9': { |
3937 int index = 0; | 4033 int index = 0; |
3938 if (ParseBackReferenceIndex(&index)) { | 4034 if (ParseBackReferenceIndex(&index)) { |
3939 if (!CaptureAvailable(index - 1)) { | 4035 RegExpCapture* capture = NULL; |
3940 // Prepare to ignore a following quantifier | 4036 if (captures_ != NULL && index <= captures_->length()) { |
3941 builder.AddEmpty(); | 4037 capture = captures_->at(index - 1); |
4038 } | |
4039 if (capture == NULL) { | |
4040 builder->AddEmpty(); | |
3942 goto has_read_atom; | 4041 goto has_read_atom; |
3943 } | 4042 } |
3944 RegExpCapture* capture = captures_->at(index - 1); | |
3945 RegExpTree* atom = new RegExpBackReference(capture); | 4043 RegExpTree* atom = new RegExpBackReference(capture); |
3946 builder.AddAtom(atom); | 4044 builder->AddAtom(atom); |
3947 goto has_read_atom; // Avoid setting has_character_escapes_. | 4045 goto has_read_atom; // Avoid setting has_character_escapes_. |
3948 } | 4046 } |
3949 uc32 first_digit = Next(); | 4047 uc32 first_digit = Next(); |
3950 if (first_digit == '8' || first_digit == '9') { | 4048 if (first_digit == '8' || first_digit == '9') { |
3951 // Treat as identity escape | 4049 // Treat as identity escape |
3952 builder.AddCharacter(first_digit); | 4050 builder->AddCharacter(first_digit); |
3953 Advance(2); | 4051 Advance(2); |
3954 break; | 4052 break; |
3955 } | 4053 } |
3956 } | 4054 } |
3957 // FALLTHROUGH | 4055 // FALLTHROUGH |
3958 case '0': { | 4056 case '0': { |
3959 Advance(); | 4057 Advance(); |
3960 uc32 octal = ParseOctalLiteral(); | 4058 uc32 octal = ParseOctalLiteral(); |
3961 builder.AddCharacter(octal); | 4059 builder->AddCharacter(octal); |
3962 break; | 4060 break; |
3963 } | 4061 } |
3964 // ControlEscape :: one of | 4062 // ControlEscape :: one of |
3965 // f n r t v | 4063 // f n r t v |
3966 case 'f': | 4064 case 'f': |
3967 Advance(2); | 4065 Advance(2); |
3968 builder.AddCharacter('\f'); | 4066 builder->AddCharacter('\f'); |
3969 break; | 4067 break; |
3970 case 'n': | 4068 case 'n': |
3971 Advance(2); | 4069 Advance(2); |
3972 builder.AddCharacter('\n'); | 4070 builder->AddCharacter('\n'); |
3973 break; | 4071 break; |
3974 case 'r': | 4072 case 'r': |
3975 Advance(2); | 4073 Advance(2); |
3976 builder.AddCharacter('\r'); | 4074 builder->AddCharacter('\r'); |
3977 break; | 4075 break; |
3978 case 't': | 4076 case 't': |
3979 Advance(2); | 4077 Advance(2); |
3980 builder.AddCharacter('\t'); | 4078 builder->AddCharacter('\t'); |
3981 break; | 4079 break; |
3982 case 'v': | 4080 case 'v': |
3983 Advance(2); | 4081 Advance(2); |
3984 builder.AddCharacter('\v'); | 4082 builder->AddCharacter('\v'); |
3985 break; | 4083 break; |
3986 case 'c': { | 4084 case 'c': { |
3987 Advance(2); | 4085 Advance(2); |
3988 uc32 control = ParseControlLetterEscape(); | 4086 uc32 control = ParseControlLetterEscape(); |
3989 builder.AddCharacter(control); | 4087 builder->AddCharacter(control); |
3990 break; | 4088 break; |
3991 } | 4089 } |
3992 case 'x': { | 4090 case 'x': { |
3993 Advance(2); | 4091 Advance(2); |
3994 uc32 value; | 4092 uc32 value; |
3995 if (ParseHexEscape(2, &value)) { | 4093 if (ParseHexEscape(2, &value)) { |
3996 builder.AddCharacter(value); | 4094 builder->AddCharacter(value); |
3997 } else { | 4095 } else { |
3998 builder.AddCharacter('x'); | 4096 builder->AddCharacter('x'); |
3999 } | 4097 } |
4000 break; | 4098 break; |
4001 } | 4099 } |
4002 case 'u': { | 4100 case 'u': { |
4003 Advance(2); | 4101 Advance(2); |
4004 uc32 value; | 4102 uc32 value; |
4005 if (ParseHexEscape(4, &value)) { | 4103 if (ParseHexEscape(4, &value)) { |
4006 builder.AddCharacter(value); | 4104 builder->AddCharacter(value); |
4007 } else { | 4105 } else { |
4008 builder.AddCharacter('u'); | 4106 builder->AddCharacter('u'); |
4009 } | 4107 } |
4010 break; | 4108 break; |
4011 } | 4109 } |
4012 default: | 4110 default: |
4013 // Identity escape. | 4111 // Identity escape. |
4014 builder.AddCharacter(Next()); | 4112 builder->AddCharacter(Next()); |
4015 Advance(2); | 4113 Advance(2); |
4016 break; | 4114 break; |
4017 } | 4115 } |
4018 break; | 4116 break; |
4019 case '{': { | 4117 case '{': { |
4020 int dummy; | 4118 int dummy; |
4021 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 4119 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
4022 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 4120 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
4023 } | 4121 } |
4024 // fallthrough | 4122 // fallthrough |
4025 } | 4123 } |
4026 default: | 4124 default: |
4027 builder.AddCharacter(current()); | 4125 builder->AddCharacter(current()); |
4028 Advance(); | 4126 Advance(); |
4029 break; | 4127 break; |
4030 } // end switch(current()) | 4128 } // end switch(current()) |
4031 | 4129 |
4032 has_read_atom: | 4130 has_read_atom: |
4033 int min; | 4131 int min; |
4034 int max; | 4132 int max; |
4035 switch (current()) { | 4133 switch (current()) { |
4036 // QuantifierPrefix :: | 4134 // QuantifierPrefix :: |
4037 // * | 4135 // * |
(...skipping 26 matching lines...) Expand all Loading... | |
4064 continue; | 4162 continue; |
4065 } | 4163 } |
4066 default: | 4164 default: |
4067 continue; | 4165 continue; |
4068 } | 4166 } |
4069 bool is_greedy = true; | 4167 bool is_greedy = true; |
4070 if (current() == '?') { | 4168 if (current() == '?') { |
4071 is_greedy = false; | 4169 is_greedy = false; |
4072 Advance(); | 4170 Advance(); |
4073 } | 4171 } |
4074 builder.AddQuantifierToAtom(min, max, is_greedy); | 4172 builder->AddQuantifierToAtom(min, max, is_greedy); |
4075 } | 4173 } |
4076 } | 4174 } |
4077 | 4175 |
4078 class SourceCharacter { | 4176 class SourceCharacter { |
4079 public: | 4177 public: |
4080 static bool Is(uc32 c) { | 4178 static bool Is(uc32 c) { |
4081 switch (c) { | 4179 switch (c) { |
4082 // case ']': case '}': | 4180 // case ']': case '}': |
4083 // In spidermonkey and jsc these are treated as source characters | 4181 // In spidermonkey and jsc these are treated as source characters |
4084 // so we do too. | 4182 // so we do too. |
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4375 // by the ECMAScript specification. | 4473 // by the ECMAScript specification. |
4376 uc32 result = current(); | 4474 uc32 result = current(); |
4377 Advance(); | 4475 Advance(); |
4378 return result; | 4476 return result; |
4379 } | 4477 } |
4380 } | 4478 } |
4381 return 0; | 4479 return 0; |
4382 } | 4480 } |
4383 | 4481 |
4384 | 4482 |
4385 RegExpTree* RegExpParser::ParseGroup() { | |
4386 ASSERT_EQ(current(), '('); | |
4387 char type = '('; | |
4388 Advance(); | |
4389 if (current() == '?') { | |
4390 switch (Next()) { | |
4391 case ':': case '=': case '!': | |
4392 type = Next(); | |
4393 Advance(2); | |
4394 break; | |
4395 default: | |
4396 ReportError(CStrVector("Invalid group") CHECK_FAILED); | |
4397 break; | |
4398 } | |
4399 } else { | |
4400 if (captures_ == NULL) { | |
4401 captures_ = new ZoneList<RegExpCapture*>(2); | |
4402 } | |
4403 if (captures_started() >= kMaxCaptures) { | |
4404 ReportError(CStrVector("Too many captures") CHECK_FAILED); | |
4405 } | |
4406 captures_->Add(NULL); | |
4407 } | |
4408 int capture_index = captures_started(); | |
4409 RegExpTree* body = ParseDisjunction(CHECK_FAILED); | |
4410 if (current() != ')') { | |
4411 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | |
4412 } | |
4413 Advance(); | |
4414 | |
4415 int end_capture_index = captures_started(); | |
4416 if (type == '!') { | |
4417 // Captures inside a negative lookahead are never available outside it. | |
4418 for (int i = capture_index; i < end_capture_index; i++) { | |
4419 RegExpCapture* capture = captures_->at(i); | |
4420 ASSERT(capture != NULL); | |
4421 capture->set_available(CAPTURE_PERMANENTLY_UNREACHABLE); | |
4422 } | |
4423 } else { | |
4424 // Captures temporarily unavailable because they are in different | |
4425 // alternatives are all available after the disjunction. | |
4426 for (int i = capture_index; i < end_capture_index; i++) { | |
4427 RegExpCapture* capture = captures_->at(i); | |
4428 ASSERT(capture != NULL); | |
4429 if (capture->available() == CAPTURE_UNREACHABLE) { | |
4430 capture->set_available(CAPTURE_AVAILABLE); | |
4431 } | |
4432 } | |
4433 } | |
4434 | |
4435 if (type == '(') { | |
4436 RegExpCapture* capture = new RegExpCapture(body, capture_index); | |
4437 captures_->at(capture_index - 1) = capture; | |
4438 return capture; | |
4439 } else if (type == ':') { | |
4440 return body; | |
4441 } else { | |
4442 ASSERT(type == '=' || type == '!'); | |
4443 bool is_positive = (type == '='); | |
4444 return new RegExpLookahead(body, | |
4445 is_positive, | |
4446 end_capture_index - capture_index, | |
4447 capture_index); | |
4448 } | |
4449 } | |
4450 | |
4451 | |
4452 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { | 4483 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { |
4453 ASSERT_EQ(0, *char_class); | 4484 ASSERT_EQ(0, *char_class); |
4454 uc32 first = current(); | 4485 uc32 first = current(); |
4455 if (first == '\\') { | 4486 if (first == '\\') { |
4456 switch (Next()) { | 4487 switch (Next()) { |
4457 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | 4488 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { |
4458 *char_class = Next(); | 4489 *char_class = Next(); |
4459 Advance(2); | 4490 Advance(2); |
4460 return CharacterRange::Singleton(0); // Return dummy value. | 4491 return CharacterRange::Singleton(0); // Return dummy value. |
4461 } | 4492 } |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4646 start_position, | 4677 start_position, |
4647 is_expression); | 4678 is_expression); |
4648 return result; | 4679 return result; |
4649 } | 4680 } |
4650 | 4681 |
4651 | 4682 |
4652 #undef NEW | 4683 #undef NEW |
4653 | 4684 |
4654 | 4685 |
4655 } } // namespace v8::internal | 4686 } } // namespace v8::internal |
OLD | NEW |