Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 354 } | 354 } |
| 355 return list_; | 355 return list_; |
| 356 } | 356 } |
| 357 | 357 |
| 358 private: | 358 private: |
| 359 ZoneList<T*>* list_; | 359 ZoneList<T*>* list_; |
| 360 T* last_; | 360 T* last_; |
| 361 }; | 361 }; |
| 362 | 362 |
| 363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 364 class RegExpBuilder { | 364 class RegExpBuilder: public ZoneObject { |
| 365 public: | 365 public: |
| 366 RegExpBuilder(); | 366 RegExpBuilder(); |
| 367 void AddCharacter(uc16 character); | 367 void AddCharacter(uc16 character); |
| 368 // "Adds" an empty expression. Does nothing except consume a | 368 // "Adds" an empty expression. Does nothing except consume a |
| 369 // following quantifier | 369 // following quantifier |
| 370 void AddEmpty(); | 370 void AddEmpty(); |
| 371 void AddAtom(RegExpTree* tree); | 371 void AddAtom(RegExpTree* tree); |
| 372 void AddAssertion(RegExpTree* tree); | 372 void AddAssertion(RegExpTree* tree); |
| 373 void NewAlternative(); // '|' | 373 void NewAlternative(); // '|' |
| 374 void AddQuantifierToAtom(int min, int max, bool is_greedy); | 374 void AddQuantifierToAtom(int min, int max, bool is_greedy); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 385 #ifdef DEBUG | 385 #ifdef DEBUG |
| 386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 387 #define LAST(x) last_added_ = x; | 387 #define LAST(x) last_added_ = x; |
| 388 #else | 388 #else |
| 389 #define LAST(x) | 389 #define LAST(x) |
| 390 #endif | 390 #endif |
| 391 }; | 391 }; |
| 392 | 392 |
| 393 | 393 |
| 394 RegExpBuilder::RegExpBuilder() | 394 RegExpBuilder::RegExpBuilder() |
| 395 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() | 395 : pending_empty_(false), |
| 396 characters_(NULL), | |
| 397 terms_(), | |
| 398 alternatives_() | |
| 396 #ifdef DEBUG | 399 #ifdef DEBUG |
| 397 , last_added_(ADD_NONE) | 400 , last_added_(ADD_NONE) |
| 398 #endif | 401 #endif |
| 399 {} | 402 {} |
| 400 | 403 |
| 401 | 404 |
| 402 void RegExpBuilder::FlushCharacters() { | 405 void RegExpBuilder::FlushCharacters() { |
| 403 pending_empty_ = false; | 406 pending_empty_ = false; |
| 404 if (characters_ != NULL) { | 407 if (characters_ != NULL) { |
| 405 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); | 408 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 587 bool simple(); | 590 bool simple(); |
| 588 bool contains_anchor() { return contains_anchor_; } | 591 bool contains_anchor() { return contains_anchor_; } |
| 589 void set_contains_anchor() { contains_anchor_ = true; } | 592 void set_contains_anchor() { contains_anchor_ = true; } |
| 590 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 593 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 591 int position() { return next_pos_ - 1; } | 594 int position() { return next_pos_ - 1; } |
| 592 bool failed() { return failed_; } | 595 bool failed() { return failed_; } |
| 593 | 596 |
| 594 static const int kMaxCaptures = 1 << 16; | 597 static const int kMaxCaptures = 1 << 16; |
| 595 static const uc32 kEndMarker = (1 << 21); | 598 static const uc32 kEndMarker = (1 << 21); |
| 596 private: | 599 private: |
| 600 enum SubexpressionType { | |
| 601 INITIAL, | |
| 602 CAPTURE, // All positive values represent captures. | |
| 603 POSITIVE_LOOKAHEAD, | |
| 604 NEGATIVE_LOOKAHEAD, | |
| 605 GROUPING | |
| 606 }; | |
| 607 | |
| 608 struct RegExpParserState : public ZoneObject { | |
|
Erik Corry
2009/07/01 11:29:14
Please make this a real object.
| |
| 609 RegExpParserState(RegExpParserState* previous_state, | |
| 610 RegExpBuilder* builder, | |
| 611 int disjunction_capture_index, | |
| 612 SubexpressionType group_type) | |
| 613 : previous_state(previous_state), | |
| 614 builder(builder), | |
| 615 disjunction_capture_index(disjunction_capture_index), | |
| 616 group_type(group_type) {} | |
| 617 // Linked list implementation of stack of states. | |
| 618 RegExpParserState* previous_state; | |
| 619 // Builder for the stored disjunction. | |
| 620 RegExpBuilder* builder; | |
| 621 // Stored disjunction's capture index (if any). | |
| 622 int disjunction_capture_index; | |
| 623 // Stored disjunction type (capture, look-ahead or grouping), if any. | |
| 624 SubexpressionType group_type; | |
| 625 }; | |
| 597 | 626 |
| 598 uc32 current() { return current_; } | 627 uc32 current() { return current_; } |
| 599 bool has_more() { return has_more_; } | 628 bool has_more() { return has_more_; } |
| 600 bool has_next() { return next_pos_ < in()->length(); } | 629 bool has_next() { return next_pos_ < in()->length(); } |
| 601 uc32 Next(); | 630 uc32 Next(); |
| 602 FlatStringReader* in() { return in_; } | 631 FlatStringReader* in() { return in_; } |
| 603 void ScanForCaptures(); | 632 void ScanForCaptures(); |
| 604 bool CaptureAvailable(int index); | |
| 605 uc32 current_; | 633 uc32 current_; |
| 606 bool has_more_; | 634 bool has_more_; |
| 607 bool multiline_; | 635 bool multiline_; |
| 608 int next_pos_; | 636 int next_pos_; |
| 609 FlatStringReader* in_; | 637 FlatStringReader* in_; |
| 610 Handle<String>* error_; | 638 Handle<String>* error_; |
| 611 bool simple_; | 639 bool simple_; |
| 612 bool contains_anchor_; | 640 bool contains_anchor_; |
| 613 ZoneList<RegExpCapture*>* captures_; | 641 ZoneList<RegExpCapture*>* captures_; |
| 614 bool is_scanned_for_captures_; | 642 bool is_scanned_for_captures_; |
| (...skipping 3198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3813 } | 3841 } |
| 3814 // If the result of parsing is a literal string atom, and it has the | 3842 // If the result of parsing is a literal string atom, and it has the |
| 3815 // same length as the input, then the atom is identical to the input. | 3843 // same length as the input, then the atom is identical to the input. |
| 3816 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { | 3844 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { |
| 3817 simple_ = true; | 3845 simple_ = true; |
| 3818 } | 3846 } |
| 3819 return result; | 3847 return result; |
| 3820 } | 3848 } |
| 3821 | 3849 |
| 3822 | 3850 |
| 3823 bool RegExpParser::CaptureAvailable(int index) { | |
| 3824 if (captures_ == NULL) return false; | |
| 3825 if (index >= captures_->length()) return false; | |
| 3826 RegExpCapture* capture = captures_->at(index); | |
| 3827 return capture != NULL && capture->available() == CAPTURE_AVAILABLE; | |
| 3828 } | |
| 3829 | |
| 3830 | |
| 3831 // Disjunction :: | 3851 // Disjunction :: |
| 3832 // Alternative | 3852 // Alternative |
| 3833 // Alternative | Disjunction | 3853 // Alternative | Disjunction |
| 3834 // Alternative :: | 3854 // Alternative :: |
| 3835 // [empty] | 3855 // [empty] |
| 3836 // Term Alternative | 3856 // Term Alternative |
| 3837 // Term :: | 3857 // Term :: |
| 3838 // Assertion | 3858 // Assertion |
| 3839 // Atom | 3859 // Atom |
| 3840 // Atom Quantifier | 3860 // Atom Quantifier |
| 3841 RegExpTree* RegExpParser::ParseDisjunction() { | 3861 RegExpTree* RegExpParser::ParseDisjunction() { |
| 3842 RegExpBuilder builder; | 3862 // Used to store current state while parsing subexpressions. |
| 3843 int capture_start_index = captures_started(); | 3863 RegExpParserState* stored_state = NULL; |
|
Erik Corry
2009/07/01 11:29:14
If you initialize this to an initial stored state
| |
| 3864 RegExpBuilder* builder = new RegExpBuilder(); | |
| 3865 SubexpressionType group_type = INITIAL; | |
| 3866 // Index in captures array of first capture in this sub-expression, if any. | |
| 3867 // Also the capture index of this sub-expression itself, if group_type | |
| 3868 // is CAPTURE. | |
| 3869 int disjunction_capture_index = 0; | |
| 3844 while (true) { | 3870 while (true) { |
| 3845 switch (current()) { | 3871 switch (current()) { |
| 3846 case kEndMarker: | 3872 case kEndMarker: |
| 3847 case ')': | 3873 if (stored_state != NULL) { |
| 3848 return builder.ToRegExp(); | 3874 // Inside a parenthesized group when hitting end of input. |
| 3875 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | |
| 3876 } | |
| 3877 ASSERT_EQ(INITIAL, group_type); | |
| 3878 // Parsing completed successfully. | |
| 3879 return builder->ToRegExp(); | |
| 3880 case ')': { | |
| 3881 if (stored_state == NULL) { | |
| 3882 ReportError(CStrVector("Unexpected ')'") CHECK_FAILED); | |
| 3883 } | |
| 3884 ASSERT_NE(INITIAL, group_type); | |
| 3885 | |
| 3886 Advance(); | |
| 3887 // End disjunction parsing and convert builder content to new single | |
| 3888 // regexp atom. | |
| 3889 RegExpTree* body = builder->ToRegExp(); | |
| 3890 | |
| 3891 int end_capture_index = captures_started(); | |
| 3892 | |
| 3893 int capture_index = disjunction_capture_index; | |
| 3894 SubexpressionType type = group_type; | |
| 3895 | |
| 3896 // Restore previous state. | |
| 3897 builder = stored_state->builder; | |
| 3898 group_type = stored_state->group_type; | |
| 3899 disjunction_capture_index = stored_state->disjunction_capture_index; | |
| 3900 stored_state = stored_state->previous_state; | |
| 3901 | |
| 3902 // Build result of subexpression. | |
| 3903 if (type == CAPTURE) { | |
| 3904 RegExpCapture* capture = new RegExpCapture(body, capture_index); | |
| 3905 captures_->at(capture_index - 1) = capture; | |
| 3906 body = capture; | |
| 3907 } else if (type != GROUPING) { | |
| 3908 ASSERT(type == POSITIVE_LOOKAHEAD || type == NEGATIVE_LOOKAHEAD); | |
| 3909 bool is_positive = (type == POSITIVE_LOOKAHEAD); | |
| 3910 body = new RegExpLookahead(body, | |
| 3911 is_positive, | |
| 3912 end_capture_index - capture_index, | |
| 3913 capture_index); | |
| 3914 } | |
| 3915 builder->AddAtom(body); | |
| 3916 break; | |
| 3917 } | |
| 3849 case '|': { | 3918 case '|': { |
| 3850 Advance(); | 3919 Advance(); |
| 3851 builder.NewAlternative(); | 3920 builder->NewAlternative(); |
| 3852 int capture_new_alt_start_index = captures_started(); | |
| 3853 for (int i = capture_start_index; i < capture_new_alt_start_index; i++) { | |
| 3854 RegExpCapture* capture = captures_->at(i); | |
| 3855 if (capture->available() == CAPTURE_AVAILABLE) { | |
| 3856 capture->set_available(CAPTURE_UNREACHABLE); | |
| 3857 } | |
| 3858 } | |
| 3859 capture_start_index = capture_new_alt_start_index; | |
| 3860 continue; | 3921 continue; |
| 3861 } | 3922 } |
| 3862 case '*': | 3923 case '*': |
| 3863 case '+': | 3924 case '+': |
| 3864 case '?': | 3925 case '?': |
| 3865 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 3926 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
| 3866 case '^': { | 3927 case '^': { |
| 3867 Advance(); | 3928 Advance(); |
| 3868 if (multiline_) { | 3929 if (multiline_) { |
| 3869 builder.AddAssertion( | 3930 builder->AddAssertion( |
| 3870 new RegExpAssertion(RegExpAssertion::START_OF_LINE)); | 3931 new RegExpAssertion(RegExpAssertion::START_OF_LINE)); |
| 3871 } else { | 3932 } else { |
| 3872 builder.AddAssertion( | 3933 builder->AddAssertion( |
| 3873 new RegExpAssertion(RegExpAssertion::START_OF_INPUT)); | 3934 new RegExpAssertion(RegExpAssertion::START_OF_INPUT)); |
| 3874 set_contains_anchor(); | 3935 set_contains_anchor(); |
| 3875 } | 3936 } |
| 3876 continue; | 3937 continue; |
| 3877 } | 3938 } |
| 3878 case '$': { | 3939 case '$': { |
| 3879 Advance(); | 3940 Advance(); |
| 3880 RegExpAssertion::Type type = | 3941 RegExpAssertion::Type type = |
| 3881 multiline_ ? RegExpAssertion::END_OF_LINE : | 3942 multiline_ ? RegExpAssertion::END_OF_LINE : |
| 3882 RegExpAssertion::END_OF_INPUT; | 3943 RegExpAssertion::END_OF_INPUT; |
| 3883 builder.AddAssertion(new RegExpAssertion(type)); | 3944 builder->AddAssertion(new RegExpAssertion(type)); |
| 3884 continue; | 3945 continue; |
| 3885 } | 3946 } |
| 3886 case '.': { | 3947 case '.': { |
| 3887 Advance(); | 3948 Advance(); |
| 3888 // everything except \x0a, \x0d, \u2028 and \u2029 | 3949 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 3889 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3950 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3890 CharacterRange::AddClassEscape('.', ranges); | 3951 CharacterRange::AddClassEscape('.', ranges); |
| 3891 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 3952 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3892 builder.AddAtom(atom); | 3953 builder->AddAtom(atom); |
| 3893 break; | 3954 break; |
| 3894 } | 3955 } |
| 3895 case '(': { | 3956 case '(': { |
| 3896 RegExpTree* atom = ParseGroup(CHECK_FAILED); | 3957 SubexpressionType type = CAPTURE; |
| 3897 builder.AddAtom(atom); | 3958 Advance(); |
| 3959 if (current() == '?') { | |
| 3960 switch (Next()) { | |
| 3961 case ':': | |
| 3962 type = GROUPING; | |
| 3963 break; | |
| 3964 case '=': | |
| 3965 type = POSITIVE_LOOKAHEAD; | |
| 3966 break; | |
| 3967 case '!': | |
| 3968 type = NEGATIVE_LOOKAHEAD; | |
| 3969 break; | |
| 3970 default: | |
| 3971 ReportError(CStrVector("Invalid group") CHECK_FAILED); | |
| 3972 break; | |
| 3973 } | |
| 3974 Advance(2); | |
| 3975 } else { | |
| 3976 if (captures_ == NULL) { | |
| 3977 captures_ = new ZoneList<RegExpCapture*>(2); | |
| 3978 } | |
| 3979 if (captures_started() >= kMaxCaptures) { | |
| 3980 ReportError(CStrVector("Too many captures") CHECK_FAILED); | |
| 3981 } | |
| 3982 captures_->Add(NULL); | |
| 3983 } | |
| 3984 // Store current state and begin new disjunction parsing. | |
| 3985 stored_state = new RegExpParserState(stored_state, | |
| 3986 builder, | |
| 3987 disjunction_capture_index, | |
| 3988 group_type); | |
| 3989 builder = new RegExpBuilder(); | |
| 3990 group_type = type; | |
| 3991 if (type == CAPTURE) { | |
| 3992 disjunction_capture_index = captures_started(); | |
| 3993 } | |
| 3898 break; | 3994 break; |
| 3899 } | 3995 } |
| 3900 case '[': { | 3996 case '[': { |
| 3901 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 3997 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
| 3902 builder.AddAtom(atom); | 3998 builder->AddAtom(atom); |
| 3903 break; | 3999 break; |
| 3904 } | 4000 } |
| 3905 // Atom :: | 4001 // Atom :: |
| 3906 // \ AtomEscape | 4002 // \ AtomEscape |
| 3907 case '\\': | 4003 case '\\': |
| 3908 switch (Next()) { | 4004 switch (Next()) { |
| 3909 case kEndMarker: | 4005 case kEndMarker: |
| 3910 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); | 4006 ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED); |
| 3911 case 'b': | 4007 case 'b': |
| 3912 Advance(2); | 4008 Advance(2); |
| 3913 builder.AddAssertion( | 4009 builder->AddAssertion( |
| 3914 new RegExpAssertion(RegExpAssertion::BOUNDARY)); | 4010 new RegExpAssertion(RegExpAssertion::BOUNDARY)); |
| 3915 continue; | 4011 continue; |
| 3916 case 'B': | 4012 case 'B': |
| 3917 Advance(2); | 4013 Advance(2); |
| 3918 builder.AddAssertion( | 4014 builder->AddAssertion( |
| 3919 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | 4015 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
| 3920 continue; | 4016 continue; |
| 3921 // AtomEscape :: | 4017 // AtomEscape :: |
| 3922 // CharacterClassEscape | 4018 // CharacterClassEscape |
| 3923 // | 4019 // |
| 3924 // CharacterClassEscape :: one of | 4020 // CharacterClassEscape :: one of |
| 3925 // d D s S w W | 4021 // d D s S w W |
| 3926 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 4022 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 3927 uc32 c = Next(); | 4023 uc32 c = Next(); |
| 3928 Advance(2); | 4024 Advance(2); |
| 3929 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 4025 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3930 CharacterRange::AddClassEscape(c, ranges); | 4026 CharacterRange::AddClassEscape(c, ranges); |
| 3931 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 4027 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3932 builder.AddAtom(atom); | 4028 builder->AddAtom(atom); |
| 3933 goto has_read_atom; // Avoid setting has_character_escapes_. | 4029 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3934 } | 4030 } |
| 3935 case '1': case '2': case '3': case '4': case '5': case '6': | 4031 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3936 case '7': case '8': case '9': { | 4032 case '7': case '8': case '9': { |
| 3937 int index = 0; | 4033 int index = 0; |
| 3938 if (ParseBackReferenceIndex(&index)) { | 4034 if (ParseBackReferenceIndex(&index)) { |
| 3939 if (!CaptureAvailable(index - 1)) { | 4035 RegExpCapture* capture = NULL; |
| 3940 // Prepare to ignore a following quantifier | 4036 if (captures_ != NULL && index <= captures_->length()) { |
| 3941 builder.AddEmpty(); | 4037 capture = captures_->at(index - 1); |
| 4038 } | |
| 4039 if (capture == NULL) { | |
| 4040 builder->AddEmpty(); | |
| 3942 goto has_read_atom; | 4041 goto has_read_atom; |
| 3943 } | 4042 } |
| 3944 RegExpCapture* capture = captures_->at(index - 1); | |
| 3945 RegExpTree* atom = new RegExpBackReference(capture); | 4043 RegExpTree* atom = new RegExpBackReference(capture); |
| 3946 builder.AddAtom(atom); | 4044 builder->AddAtom(atom); |
| 3947 goto has_read_atom; // Avoid setting has_character_escapes_. | 4045 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3948 } | 4046 } |
| 3949 uc32 first_digit = Next(); | 4047 uc32 first_digit = Next(); |
| 3950 if (first_digit == '8' || first_digit == '9') { | 4048 if (first_digit == '8' || first_digit == '9') { |
| 3951 // Treat as identity escape | 4049 // Treat as identity escape |
| 3952 builder.AddCharacter(first_digit); | 4050 builder->AddCharacter(first_digit); |
| 3953 Advance(2); | 4051 Advance(2); |
| 3954 break; | 4052 break; |
| 3955 } | 4053 } |
| 3956 } | 4054 } |
| 3957 // FALLTHROUGH | 4055 // FALLTHROUGH |
| 3958 case '0': { | 4056 case '0': { |
| 3959 Advance(); | 4057 Advance(); |
| 3960 uc32 octal = ParseOctalLiteral(); | 4058 uc32 octal = ParseOctalLiteral(); |
| 3961 builder.AddCharacter(octal); | 4059 builder->AddCharacter(octal); |
| 3962 break; | 4060 break; |
| 3963 } | 4061 } |
| 3964 // ControlEscape :: one of | 4062 // ControlEscape :: one of |
| 3965 // f n r t v | 4063 // f n r t v |
| 3966 case 'f': | 4064 case 'f': |
| 3967 Advance(2); | 4065 Advance(2); |
| 3968 builder.AddCharacter('\f'); | 4066 builder->AddCharacter('\f'); |
| 3969 break; | 4067 break; |
| 3970 case 'n': | 4068 case 'n': |
| 3971 Advance(2); | 4069 Advance(2); |
| 3972 builder.AddCharacter('\n'); | 4070 builder->AddCharacter('\n'); |
| 3973 break; | 4071 break; |
| 3974 case 'r': | 4072 case 'r': |
| 3975 Advance(2); | 4073 Advance(2); |
| 3976 builder.AddCharacter('\r'); | 4074 builder->AddCharacter('\r'); |
| 3977 break; | 4075 break; |
| 3978 case 't': | 4076 case 't': |
| 3979 Advance(2); | 4077 Advance(2); |
| 3980 builder.AddCharacter('\t'); | 4078 builder->AddCharacter('\t'); |
| 3981 break; | 4079 break; |
| 3982 case 'v': | 4080 case 'v': |
| 3983 Advance(2); | 4081 Advance(2); |
| 3984 builder.AddCharacter('\v'); | 4082 builder->AddCharacter('\v'); |
| 3985 break; | 4083 break; |
| 3986 case 'c': { | 4084 case 'c': { |
| 3987 Advance(2); | 4085 Advance(2); |
| 3988 uc32 control = ParseControlLetterEscape(); | 4086 uc32 control = ParseControlLetterEscape(); |
| 3989 builder.AddCharacter(control); | 4087 builder->AddCharacter(control); |
| 3990 break; | 4088 break; |
| 3991 } | 4089 } |
| 3992 case 'x': { | 4090 case 'x': { |
| 3993 Advance(2); | 4091 Advance(2); |
| 3994 uc32 value; | 4092 uc32 value; |
| 3995 if (ParseHexEscape(2, &value)) { | 4093 if (ParseHexEscape(2, &value)) { |
| 3996 builder.AddCharacter(value); | 4094 builder->AddCharacter(value); |
| 3997 } else { | 4095 } else { |
| 3998 builder.AddCharacter('x'); | 4096 builder->AddCharacter('x'); |
| 3999 } | 4097 } |
| 4000 break; | 4098 break; |
| 4001 } | 4099 } |
| 4002 case 'u': { | 4100 case 'u': { |
| 4003 Advance(2); | 4101 Advance(2); |
| 4004 uc32 value; | 4102 uc32 value; |
| 4005 if (ParseHexEscape(4, &value)) { | 4103 if (ParseHexEscape(4, &value)) { |
| 4006 builder.AddCharacter(value); | 4104 builder->AddCharacter(value); |
| 4007 } else { | 4105 } else { |
| 4008 builder.AddCharacter('u'); | 4106 builder->AddCharacter('u'); |
| 4009 } | 4107 } |
| 4010 break; | 4108 break; |
| 4011 } | 4109 } |
| 4012 default: | 4110 default: |
| 4013 // Identity escape. | 4111 // Identity escape. |
| 4014 builder.AddCharacter(Next()); | 4112 builder->AddCharacter(Next()); |
| 4015 Advance(2); | 4113 Advance(2); |
| 4016 break; | 4114 break; |
| 4017 } | 4115 } |
| 4018 break; | 4116 break; |
| 4019 case '{': { | 4117 case '{': { |
| 4020 int dummy; | 4118 int dummy; |
| 4021 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 4119 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
| 4022 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 4120 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
| 4023 } | 4121 } |
| 4024 // fallthrough | 4122 // fallthrough |
| 4025 } | 4123 } |
| 4026 default: | 4124 default: |
| 4027 builder.AddCharacter(current()); | 4125 builder->AddCharacter(current()); |
| 4028 Advance(); | 4126 Advance(); |
| 4029 break; | 4127 break; |
| 4030 } // end switch(current()) | 4128 } // end switch(current()) |
| 4031 | 4129 |
| 4032 has_read_atom: | 4130 has_read_atom: |
| 4033 int min; | 4131 int min; |
| 4034 int max; | 4132 int max; |
| 4035 switch (current()) { | 4133 switch (current()) { |
| 4036 // QuantifierPrefix :: | 4134 // QuantifierPrefix :: |
| 4037 // * | 4135 // * |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 4064 continue; | 4162 continue; |
| 4065 } | 4163 } |
| 4066 default: | 4164 default: |
| 4067 continue; | 4165 continue; |
| 4068 } | 4166 } |
| 4069 bool is_greedy = true; | 4167 bool is_greedy = true; |
| 4070 if (current() == '?') { | 4168 if (current() == '?') { |
| 4071 is_greedy = false; | 4169 is_greedy = false; |
| 4072 Advance(); | 4170 Advance(); |
| 4073 } | 4171 } |
| 4074 builder.AddQuantifierToAtom(min, max, is_greedy); | 4172 builder->AddQuantifierToAtom(min, max, is_greedy); |
| 4075 } | 4173 } |
| 4076 } | 4174 } |
| 4077 | 4175 |
| 4078 class SourceCharacter { | 4176 class SourceCharacter { |
| 4079 public: | 4177 public: |
| 4080 static bool Is(uc32 c) { | 4178 static bool Is(uc32 c) { |
| 4081 switch (c) { | 4179 switch (c) { |
| 4082 // case ']': case '}': | 4180 // case ']': case '}': |
| 4083 // In spidermonkey and jsc these are treated as source characters | 4181 // In spidermonkey and jsc these are treated as source characters |
| 4084 // so we do too. | 4182 // so we do too. |
| (...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4375 // by the ECMAScript specification. | 4473 // by the ECMAScript specification. |
| 4376 uc32 result = current(); | 4474 uc32 result = current(); |
| 4377 Advance(); | 4475 Advance(); |
| 4378 return result; | 4476 return result; |
| 4379 } | 4477 } |
| 4380 } | 4478 } |
| 4381 return 0; | 4479 return 0; |
| 4382 } | 4480 } |
| 4383 | 4481 |
| 4384 | 4482 |
| 4385 RegExpTree* RegExpParser::ParseGroup() { | |
| 4386 ASSERT_EQ(current(), '('); | |
| 4387 char type = '('; | |
| 4388 Advance(); | |
| 4389 if (current() == '?') { | |
| 4390 switch (Next()) { | |
| 4391 case ':': case '=': case '!': | |
| 4392 type = Next(); | |
| 4393 Advance(2); | |
| 4394 break; | |
| 4395 default: | |
| 4396 ReportError(CStrVector("Invalid group") CHECK_FAILED); | |
| 4397 break; | |
| 4398 } | |
| 4399 } else { | |
| 4400 if (captures_ == NULL) { | |
| 4401 captures_ = new ZoneList<RegExpCapture*>(2); | |
| 4402 } | |
| 4403 if (captures_started() >= kMaxCaptures) { | |
| 4404 ReportError(CStrVector("Too many captures") CHECK_FAILED); | |
| 4405 } | |
| 4406 captures_->Add(NULL); | |
| 4407 } | |
| 4408 int capture_index = captures_started(); | |
| 4409 RegExpTree* body = ParseDisjunction(CHECK_FAILED); | |
| 4410 if (current() != ')') { | |
| 4411 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | |
| 4412 } | |
| 4413 Advance(); | |
| 4414 | |
| 4415 int end_capture_index = captures_started(); | |
| 4416 if (type == '!') { | |
| 4417 // Captures inside a negative lookahead are never available outside it. | |
| 4418 for (int i = capture_index; i < end_capture_index; i++) { | |
| 4419 RegExpCapture* capture = captures_->at(i); | |
| 4420 ASSERT(capture != NULL); | |
| 4421 capture->set_available(CAPTURE_PERMANENTLY_UNREACHABLE); | |
| 4422 } | |
| 4423 } else { | |
| 4424 // Captures temporarily unavailable because they are in different | |
| 4425 // alternatives are all available after the disjunction. | |
| 4426 for (int i = capture_index; i < end_capture_index; i++) { | |
| 4427 RegExpCapture* capture = captures_->at(i); | |
| 4428 ASSERT(capture != NULL); | |
| 4429 if (capture->available() == CAPTURE_UNREACHABLE) { | |
| 4430 capture->set_available(CAPTURE_AVAILABLE); | |
| 4431 } | |
| 4432 } | |
| 4433 } | |
| 4434 | |
| 4435 if (type == '(') { | |
| 4436 RegExpCapture* capture = new RegExpCapture(body, capture_index); | |
| 4437 captures_->at(capture_index - 1) = capture; | |
| 4438 return capture; | |
| 4439 } else if (type == ':') { | |
| 4440 return body; | |
| 4441 } else { | |
| 4442 ASSERT(type == '=' || type == '!'); | |
| 4443 bool is_positive = (type == '='); | |
| 4444 return new RegExpLookahead(body, | |
| 4445 is_positive, | |
| 4446 end_capture_index - capture_index, | |
| 4447 capture_index); | |
| 4448 } | |
| 4449 } | |
| 4450 | |
| 4451 | |
| 4452 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { | 4483 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { |
| 4453 ASSERT_EQ(0, *char_class); | 4484 ASSERT_EQ(0, *char_class); |
| 4454 uc32 first = current(); | 4485 uc32 first = current(); |
| 4455 if (first == '\\') { | 4486 if (first == '\\') { |
| 4456 switch (Next()) { | 4487 switch (Next()) { |
| 4457 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | 4488 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { |
| 4458 *char_class = Next(); | 4489 *char_class = Next(); |
| 4459 Advance(2); | 4490 Advance(2); |
| 4460 return CharacterRange::Singleton(0); // Return dummy value. | 4491 return CharacterRange::Singleton(0); // Return dummy value. |
| 4461 } | 4492 } |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4646 start_position, | 4677 start_position, |
| 4647 is_expression); | 4678 is_expression); |
| 4648 return result; | 4679 return result; |
| 4649 } | 4680 } |
| 4650 | 4681 |
| 4651 | 4682 |
| 4652 #undef NEW | 4683 #undef NEW |
| 4653 | 4684 |
| 4654 | 4685 |
| 4655 } } // namespace v8::internal | 4686 } } // namespace v8::internal |
| OLD | NEW |