Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(666)

Side by Side Diff: src/parser.cc

Issue 9104: Dispatch tables (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
255 255
256 uc32 ParseControlEscape(bool* ok); 256 uc32 ParseControlEscape(bool* ok);
257 uc32 ParseOctalLiteral(bool* ok); 257 uc32 ParseOctalLiteral(bool* ok);
258 258
259 // Tries to parse the input as a backreference. If successful it 259 // Tries to parse the input as a backreference. If successful it
260 // stores the result in the output parameter and returns true. If 260 // stores the result in the output parameter and returns true. If
261 // it fails it will push back the characters read so the same characters 261 // it fails it will push back the characters read so the same characters
262 // can be reparsed. 262 // can be reparsed.
263 bool ParseBackreferenceIndex(int* index_out); 263 bool ParseBackreferenceIndex(int* index_out);
264 264
265 CharacterRange ParseClassAtom(bool* ok); 265 CharacterRange ParseClassAtom(bool* is_char_class,
266 ZoneList<CharacterRange>* ranges,
267 bool* ok);
266 RegExpTree* ReportError(Vector<const char> message, bool* ok); 268 RegExpTree* ReportError(Vector<const char> message, bool* ok);
267 void Advance(); 269 void Advance();
268 void Advance(int dist); 270 void Advance(int dist);
269 // Pushes a read character (or potentially some other character) back 271 // Pushes a read character (or potentially some other character) back
270 // on the input stream. After pushing it back, it becomes the character 272 // on the input stream. After pushing it back, it becomes the character
271 // returned by current(). There is a limited amount of push-back buffer. 273 // returned by current(). There is a limited amount of push-back buffer.
272 // A function using PushBack should check that it doesn't push back more 274 // A function using PushBack should check that it doesn't push back more
273 // than kMaxPushback characters, and it should not push back more characters 275 // than kMaxPushback characters, and it should not push back more characters
274 // than it has read, or that it knows had been read prior to calling it. 276 // than it has read, or that it knows had been read prior to calling it.
275 void PushBack(uc32 character); 277 void PushBack(uc32 character);
(...skipping 3174 matching lines...) Expand 10 before | Expand all | Expand 10 after
3450 case '^': 3452 case '^':
3451 Advance(); 3453 Advance();
3452 return new RegExpAssertion( 3454 return new RegExpAssertion(
3453 multiline_mode_ ? RegExpAssertion::START_OF_LINE 3455 multiline_mode_ ? RegExpAssertion::START_OF_LINE
3454 : RegExpAssertion::START_OF_INPUT); 3456 : RegExpAssertion::START_OF_INPUT);
3455 case '$': 3457 case '$':
3456 Advance(); 3458 Advance();
3457 return new RegExpAssertion( 3459 return new RegExpAssertion(
3458 multiline_mode_ ? RegExpAssertion::END_OF_LINE 3460 multiline_mode_ ? RegExpAssertion::END_OF_LINE
3459 : RegExpAssertion::END_OF_INPUT); 3461 : RegExpAssertion::END_OF_INPUT);
3460 case '.': 3462 case '.': {
3461 Advance(); 3463 Advance();
3462 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.')); 3464 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3465 CharacterRange::AddClassEscape('.', ranges);
3466 atom = new RegExpCharacterClass(ranges, false);
3463 break; 3467 break;
3468 }
3464 case '(': 3469 case '(':
3465 atom = ParseGroup(CHECK_OK); 3470 atom = ParseGroup(CHECK_OK);
3466 break; 3471 break;
3467 case '[': 3472 case '[':
3468 atom = ParseCharacterClass(CHECK_OK); 3473 atom = ParseCharacterClass(CHECK_OK);
3469 break; 3474 break;
3470 // Atom :: 3475 // Atom ::
3471 // \ AtomEscape 3476 // \ AtomEscape
3472 case '\\': 3477 case '\\':
3473 if (has_next()) { 3478 if (has_next()) {
3474 switch (next()) { 3479 switch (next()) {
3475 case 'b': 3480 case 'b':
3476 Advance(2); 3481 Advance(2);
3477 return new RegExpAssertion(RegExpAssertion::BOUNDARY); 3482 return new RegExpAssertion(RegExpAssertion::BOUNDARY);
3478 case 'B': 3483 case 'B':
3479 Advance(2); 3484 Advance(2);
3480 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY); 3485 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY);
3481 // AtomEscape :: 3486 // AtomEscape ::
3482 // CharacterClassEscape 3487 // CharacterClassEscape
3483 // 3488 //
3484 // CharacterClassEscape :: one of 3489 // CharacterClassEscape :: one of
3485 // d D s S w W 3490 // d D s S w W
3486 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { 3491 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
3487 uc32 c = next(); 3492 uc32 c = next();
3493 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3494 CharacterRange::AddClassEscape(c, ranges);
3488 Advance(2); 3495 Advance(2);
3489 atom = new RegExpCharacterClass(CharacterRange::CharacterClass(c)); 3496 atom = new RegExpCharacterClass(ranges, false);
3490 goto has_read_atom; 3497 goto has_read_atom;
3491 } 3498 }
3492 case '1': case '2': case '3': case '4': case '5': case '6': 3499 case '1': case '2': case '3': case '4': case '5': case '6':
3493 case '7': case '8': case '9': { 3500 case '7': case '8': case '9': {
3494 int index = 0; 3501 int index = 0;
3495 if (ParseBackreferenceIndex(&index)) { 3502 if (ParseBackreferenceIndex(&index)) {
3496 atom = new RegExpBackreference(index); 3503 atom = new RegExpBackreference(index);
3497 goto has_read_atom; 3504 goto has_read_atom;
3498 } else { 3505 } else {
3499 // If this is not a backreference we go to the atom parser 3506 // If this is not a backreference we go to the atom parser
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after
3787 } else if (type == ':') { 3794 } else if (type == ':') {
3788 return body; 3795 return body;
3789 } else { 3796 } else {
3790 ASSERT(type == '=' || type == '!'); 3797 ASSERT(type == '=' || type == '!');
3791 bool is_positive = (type == '='); 3798 bool is_positive = (type == '=');
3792 return new RegExpLookahead(body, is_positive); 3799 return new RegExpLookahead(body, is_positive);
3793 } 3800 }
3794 } 3801 }
3795 3802
3796 3803
3797 CharacterRange RegExpParser::ParseClassAtom(bool* ok) { 3804 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,
3805 ZoneList<CharacterRange>* ranges,
3806 bool* ok) {
3807 ASSERT_EQ(false, *is_char_class);
3798 uc32 first = current(); 3808 uc32 first = current();
3799 if (first == '\\') { 3809 if (first == '\\') {
3800 switch (next()) { 3810 switch (next()) {
3801 case 'b': 3811 case 'b':
3802 Advance(2); 3812 Advance(2);
3803 return CharacterRange::Singleton('\b'); 3813 return CharacterRange::Singleton('\b');
3804 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { 3814 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
3815 *is_char_class = true;
3805 uc32 c = next(); 3816 uc32 c = next();
3817 CharacterRange::AddClassEscape(c, ranges);
3806 Advance(2); 3818 Advance(2);
3807 return CharacterRange::CharacterClass(c); 3819 return NULL;
3808 } 3820 }
3809 default: 3821 default:
3810 uc32 c = ParseCharacterEscape(CHECK_OK); 3822 uc32 c = ParseCharacterEscape(CHECK_OK);
3811 return CharacterRange::Singleton(c); 3823 return CharacterRange::Singleton(c);
3812 } 3824 }
3813 } else { 3825 } else {
3814 Advance(); 3826 Advance();
3815 return CharacterRange::Singleton(first); 3827 return CharacterRange::Singleton(first);
3816 } 3828 }
3817 } 3829 }
(...skipping 10 matching lines...) Expand all
3828 if (current() == '^') { 3840 if (current() == '^') {
3829 is_negated = true; 3841 is_negated = true;
3830 Advance(); 3842 Advance();
3831 } 3843 }
3832 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 3844 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
3833 while (has_more() && current() != ']') { 3845 while (has_more() && current() != ']') {
3834 if (current() == '-') { 3846 if (current() == '-') {
3835 Advance(); 3847 Advance();
3836 ranges->Add(CharacterRange::Singleton('-')); 3848 ranges->Add(CharacterRange::Singleton('-'));
3837 } else { 3849 } else {
3838 CharacterRange first = ParseClassAtom(CHECK_OK); 3850 bool is_char_class = false;
3839 if (!first.is_character_class() && current() == '-') { 3851 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);
3840 Advance(); 3852 if (!is_char_class) {
3841 CharacterRange next = ParseClassAtom(CHECK_OK); 3853 if (current() == '-') {
3842 if (next.is_character_class()) { 3854 Advance();
3843 return ReportError(CStrVector(kIllegal), CHECK_OK); 3855 CharacterRange next = ParseClassAtom(&is_char_class, ranges, CHECK_OK) ;
3856 if (is_char_class) {
3857 return ReportError(CStrVector(kIllegal), CHECK_OK);
3858 }
3859 if (first.from() > next.to()) {
3860 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
3861 }
3862 ranges->Add(CharacterRange::Range(first.from(), next.to()));
3863 } else {
3864 ranges->Add(first);
3844 } 3865 }
3845 if (first.from() > next.to()) {
3846 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
3847 }
3848 ranges->Add(CharacterRange::Range(first.from(), next.to()));
3849 } else {
3850 ranges->Add(first);
3851 } 3866 }
3852 } 3867 }
3853 } 3868 }
3854 if (!has_more()) { 3869 if (!has_more()) {
3855 return ReportError(CStrVector(kUnterminated), CHECK_OK); 3870 return ReportError(CStrVector(kUnterminated), CHECK_OK);
3856 } 3871 }
3857 Advance(); 3872 Advance();
3858 if (ranges->length() == 0) { 3873 if (ranges->length() == 0) {
3859 return RegExpEmpty::GetInstance(); 3874 return RegExpEmpty::GetInstance();
3860 } else { 3875 } else {
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
3973 start_position, 3988 start_position,
3974 is_expression); 3989 is_expression);
3975 return result; 3990 return result;
3976 } 3991 }
3977 3992
3978 3993
3979 #undef NEW 3994 #undef NEW
3980 3995
3981 3996
3982 } } // namespace v8::internal 3997 } } // namespace v8::internal
OLDNEW
« src/jsregexp.cc ('K') | « src/list-inl.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698