| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 255 | 255 |
| 256 uc32 ParseControlEscape(bool* ok); | 256 uc32 ParseControlEscape(bool* ok); |
| 257 uc32 ParseOctalLiteral(bool* ok); | 257 uc32 ParseOctalLiteral(bool* ok); |
| 258 | 258 |
| 259 // Tries to parse the input as a backreference. If successful it | 259 // Tries to parse the input as a backreference. If successful it |
| 260 // stores the result in the output parameter and returns true. If | 260 // stores the result in the output parameter and returns true. If |
| 261 // it fails it will push back the characters read so the same characters | 261 // it fails it will push back the characters read so the same characters |
| 262 // can be reparsed. | 262 // can be reparsed. |
| 263 bool ParseBackreferenceIndex(int* index_out); | 263 bool ParseBackreferenceIndex(int* index_out); |
| 264 | 264 |
| 265 CharacterRange ParseClassAtom(bool* ok); | 265 CharacterRange ParseClassAtom(bool* is_char_class, |
| 266 ZoneList<CharacterRange>* ranges, |
| 267 bool* ok); |
| 266 RegExpTree* ReportError(Vector<const char> message, bool* ok); | 268 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 267 void Advance(); | 269 void Advance(); |
| 268 void Advance(int dist); | 270 void Advance(int dist); |
| 269 // Pushes a read character (or potentially some other character) back | 271 // Pushes a read character (or potentially some other character) back |
| 270 // on the input stream. After pushing it back, it becomes the character | 272 // on the input stream. After pushing it back, it becomes the character |
| 271 // returned by current(). There is a limited amount of push-back buffer. | 273 // returned by current(). There is a limited amount of push-back buffer. |
| 272 // A function using PushBack should check that it doesn't push back more | 274 // A function using PushBack should check that it doesn't push back more |
| 273 // than kMaxPushback characters, and it should not push back more characters | 275 // than kMaxPushback characters, and it should not push back more characters |
| 274 // than it has read, or that it knows had been read prior to calling it. | 276 // than it has read, or that it knows had been read prior to calling it. |
| 275 void PushBack(uc32 character); | 277 void PushBack(uc32 character); |
| (...skipping 3174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3450 case '^': | 3452 case '^': |
| 3451 Advance(); | 3453 Advance(); |
| 3452 return new RegExpAssertion( | 3454 return new RegExpAssertion( |
| 3453 multiline_mode_ ? RegExpAssertion::START_OF_LINE | 3455 multiline_mode_ ? RegExpAssertion::START_OF_LINE |
| 3454 : RegExpAssertion::START_OF_INPUT); | 3456 : RegExpAssertion::START_OF_INPUT); |
| 3455 case '$': | 3457 case '$': |
| 3456 Advance(); | 3458 Advance(); |
| 3457 return new RegExpAssertion( | 3459 return new RegExpAssertion( |
| 3458 multiline_mode_ ? RegExpAssertion::END_OF_LINE | 3460 multiline_mode_ ? RegExpAssertion::END_OF_LINE |
| 3459 : RegExpAssertion::END_OF_INPUT); | 3461 : RegExpAssertion::END_OF_INPUT); |
| 3460 case '.': | 3462 case '.': { |
| 3461 Advance(); | 3463 Advance(); |
| 3462 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.')); | 3464 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3465 CharacterRange::AddClassEscape('.', ranges); |
| 3466 atom = new RegExpCharacterClass(ranges, false); |
| 3463 break; | 3467 break; |
| 3468 } |
| 3464 case '(': | 3469 case '(': |
| 3465 atom = ParseGroup(CHECK_OK); | 3470 atom = ParseGroup(CHECK_OK); |
| 3466 break; | 3471 break; |
| 3467 case '[': | 3472 case '[': |
| 3468 atom = ParseCharacterClass(CHECK_OK); | 3473 atom = ParseCharacterClass(CHECK_OK); |
| 3469 break; | 3474 break; |
| 3470 // Atom :: | 3475 // Atom :: |
| 3471 // \ AtomEscape | 3476 // \ AtomEscape |
| 3472 case '\\': | 3477 case '\\': |
| 3473 if (has_next()) { | 3478 if (has_next()) { |
| 3474 switch (next()) { | 3479 switch (next()) { |
| 3475 case 'b': | 3480 case 'b': |
| 3476 Advance(2); | 3481 Advance(2); |
| 3477 return new RegExpAssertion(RegExpAssertion::BOUNDARY); | 3482 return new RegExpAssertion(RegExpAssertion::BOUNDARY); |
| 3478 case 'B': | 3483 case 'B': |
| 3479 Advance(2); | 3484 Advance(2); |
| 3480 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY); | 3485 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY); |
| 3481 // AtomEscape :: | 3486 // AtomEscape :: |
| 3482 // CharacterClassEscape | 3487 // CharacterClassEscape |
| 3483 // | 3488 // |
| 3484 // CharacterClassEscape :: one of | 3489 // CharacterClassEscape :: one of |
| 3485 // d D s S w W | 3490 // d D s S w W |
| 3486 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 3491 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 3487 uc32 c = next(); | 3492 uc32 c = next(); |
| 3493 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3494 CharacterRange::AddClassEscape(c, ranges); |
| 3488 Advance(2); | 3495 Advance(2); |
| 3489 atom = new RegExpCharacterClass(CharacterRange::CharacterClass(c)); | 3496 atom = new RegExpCharacterClass(ranges, false); |
| 3490 goto has_read_atom; | 3497 goto has_read_atom; |
| 3491 } | 3498 } |
| 3492 case '1': case '2': case '3': case '4': case '5': case '6': | 3499 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3493 case '7': case '8': case '9': { | 3500 case '7': case '8': case '9': { |
| 3494 int index = 0; | 3501 int index = 0; |
| 3495 if (ParseBackreferenceIndex(&index)) { | 3502 if (ParseBackreferenceIndex(&index)) { |
| 3496 atom = new RegExpBackreference(index); | 3503 atom = new RegExpBackreference(index); |
| 3497 goto has_read_atom; | 3504 goto has_read_atom; |
| 3498 } else { | 3505 } else { |
| 3499 // If this is not a backreference we go to the atom parser | 3506 // If this is not a backreference we go to the atom parser |
| (...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3787 } else if (type == ':') { | 3794 } else if (type == ':') { |
| 3788 return body; | 3795 return body; |
| 3789 } else { | 3796 } else { |
| 3790 ASSERT(type == '=' || type == '!'); | 3797 ASSERT(type == '=' || type == '!'); |
| 3791 bool is_positive = (type == '='); | 3798 bool is_positive = (type == '='); |
| 3792 return new RegExpLookahead(body, is_positive); | 3799 return new RegExpLookahead(body, is_positive); |
| 3793 } | 3800 } |
| 3794 } | 3801 } |
| 3795 | 3802 |
| 3796 | 3803 |
| 3797 CharacterRange RegExpParser::ParseClassAtom(bool* ok) { | 3804 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, |
| 3805 ZoneList<CharacterRange>* ranges, |
| 3806 bool* ok) { |
| 3807 ASSERT_EQ(false, *is_char_class); |
| 3798 uc32 first = current(); | 3808 uc32 first = current(); |
| 3799 if (first == '\\') { | 3809 if (first == '\\') { |
| 3800 switch (next()) { | 3810 switch (next()) { |
| 3801 case 'b': | 3811 case 'b': |
| 3802 Advance(2); | 3812 Advance(2); |
| 3803 return CharacterRange::Singleton('\b'); | 3813 return CharacterRange::Singleton('\b'); |
| 3804 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | 3814 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { |
| 3815 *is_char_class = true; |
| 3805 uc32 c = next(); | 3816 uc32 c = next(); |
| 3817 CharacterRange::AddClassEscape(c, ranges); |
| 3806 Advance(2); | 3818 Advance(2); |
| 3807 return CharacterRange::CharacterClass(c); | 3819 return NULL; |
| 3808 } | 3820 } |
| 3809 default: | 3821 default: |
| 3810 uc32 c = ParseCharacterEscape(CHECK_OK); | 3822 uc32 c = ParseCharacterEscape(CHECK_OK); |
| 3811 return CharacterRange::Singleton(c); | 3823 return CharacterRange::Singleton(c); |
| 3812 } | 3824 } |
| 3813 } else { | 3825 } else { |
| 3814 Advance(); | 3826 Advance(); |
| 3815 return CharacterRange::Singleton(first); | 3827 return CharacterRange::Singleton(first); |
| 3816 } | 3828 } |
| 3817 } | 3829 } |
| (...skipping 10 matching lines...) Expand all Loading... |
| 3828 if (current() == '^') { | 3840 if (current() == '^') { |
| 3829 is_negated = true; | 3841 is_negated = true; |
| 3830 Advance(); | 3842 Advance(); |
| 3831 } | 3843 } |
| 3832 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3844 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3833 while (has_more() && current() != ']') { | 3845 while (has_more() && current() != ']') { |
| 3834 if (current() == '-') { | 3846 if (current() == '-') { |
| 3835 Advance(); | 3847 Advance(); |
| 3836 ranges->Add(CharacterRange::Singleton('-')); | 3848 ranges->Add(CharacterRange::Singleton('-')); |
| 3837 } else { | 3849 } else { |
| 3838 CharacterRange first = ParseClassAtom(CHECK_OK); | 3850 bool is_char_class = false; |
| 3839 if (!first.is_character_class() && current() == '-') { | 3851 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 3840 Advance(); | 3852 if (!is_char_class) { |
| 3841 CharacterRange next = ParseClassAtom(CHECK_OK); | 3853 if (current() == '-') { |
| 3842 if (next.is_character_class()) { | 3854 Advance(); |
| 3843 return ReportError(CStrVector(kIllegal), CHECK_OK); | 3855 CharacterRange next = ParseClassAtom(&is_char_class, ranges, CHECK_OK)
; |
| 3856 if (is_char_class) { |
| 3857 return ReportError(CStrVector(kIllegal), CHECK_OK); |
| 3858 } |
| 3859 if (first.from() > next.to()) { |
| 3860 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); |
| 3861 } |
| 3862 ranges->Add(CharacterRange::Range(first.from(), next.to())); |
| 3863 } else { |
| 3864 ranges->Add(first); |
| 3844 } | 3865 } |
| 3845 if (first.from() > next.to()) { | |
| 3846 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); | |
| 3847 } | |
| 3848 ranges->Add(CharacterRange::Range(first.from(), next.to())); | |
| 3849 } else { | |
| 3850 ranges->Add(first); | |
| 3851 } | 3866 } |
| 3852 } | 3867 } |
| 3853 } | 3868 } |
| 3854 if (!has_more()) { | 3869 if (!has_more()) { |
| 3855 return ReportError(CStrVector(kUnterminated), CHECK_OK); | 3870 return ReportError(CStrVector(kUnterminated), CHECK_OK); |
| 3856 } | 3871 } |
| 3857 Advance(); | 3872 Advance(); |
| 3858 if (ranges->length() == 0) { | 3873 if (ranges->length() == 0) { |
| 3859 return RegExpEmpty::GetInstance(); | 3874 return RegExpEmpty::GetInstance(); |
| 3860 } else { | 3875 } else { |
| (...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3973 start_position, | 3988 start_position, |
| 3974 is_expression); | 3989 is_expression); |
| 3975 return result; | 3990 return result; |
| 3976 } | 3991 } |
| 3977 | 3992 |
| 3978 | 3993 |
| 3979 #undef NEW | 3994 #undef NEW |
| 3980 | 3995 |
| 3981 | 3996 |
| 3982 } } // namespace v8::internal | 3997 } } // namespace v8::internal |
| OLD | NEW |