src/parser.cc - Issue 9104: Dispatch tables

Side by Side Diff: src/parser.cc

Issue 9104: Dispatch tables (Closed)

Patch Set: Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 244 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
255	255

256 uc32 ParseControlEscape(bool* ok);	256 uc32 ParseControlEscape(bool* ok);

257 uc32 ParseOctalLiteral(bool* ok);	257 uc32 ParseOctalLiteral(bool* ok);

258	258

259 // Tries to parse the input as a backreference. If successful it	259 // Tries to parse the input as a backreference. If successful it

260 // stores the result in the output parameter and returns true. If	260 // stores the result in the output parameter and returns true. If

261 // it fails it will push back the characters read so the same characters	261 // it fails it will push back the characters read so the same characters

262 // can be reparsed.	262 // can be reparsed.

263 bool ParseBackreferenceIndex(int* index_out);	263 bool ParseBackreferenceIndex(int* index_out);

264	264

265 CharacterRange ParseClassAtom(bool* ok);	265 CharacterRange ParseClassAtom(bool* is_char_class,

	266 ZoneList<CharacterRange>* ranges,

	267 bool* ok);

266 RegExpTree* ReportError(Vector<const char> message, bool* ok);	268 RegExpTree* ReportError(Vector<const char> message, bool* ok);

267 void Advance();	269 void Advance();

268 void Advance(int dist);	270 void Advance(int dist);

269 // Pushes a read character (or potentially some other character) back	271 // Pushes a read character (or potentially some other character) back

270 // on the input stream. After pushing it back, it becomes the character	272 // on the input stream. After pushing it back, it becomes the character

271 // returned by current(). There is a limited amount of push-back buffer.	273 // returned by current(). There is a limited amount of push-back buffer.

272 // A function using PushBack should check that it doesn't push back more	274 // A function using PushBack should check that it doesn't push back more

273 // than kMaxPushback characters, and it should not push back more characters	275 // than kMaxPushback characters, and it should not push back more characters

274 // than it has read, or that it knows had been read prior to calling it.	276 // than it has read, or that it knows had been read prior to calling it.

275 void PushBack(uc32 character);	277 void PushBack(uc32 character);

(...skipping 3174 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3450 case '^':	3452 case '^':

3451 Advance();	3453 Advance();

3452 return new RegExpAssertion(	3454 return new RegExpAssertion(

3453 multiline_mode_ ? RegExpAssertion::START_OF_LINE	3455 multiline_mode_ ? RegExpAssertion::START_OF_LINE

3454 : RegExpAssertion::START_OF_INPUT);	3456 : RegExpAssertion::START_OF_INPUT);

3455 case '$':	3457 case '$':

3456 Advance();	3458 Advance();

3457 return new RegExpAssertion(	3459 return new RegExpAssertion(

3458 multiline_mode_ ? RegExpAssertion::END_OF_LINE	3460 multiline_mode_ ? RegExpAssertion::END_OF_LINE

3459 : RegExpAssertion::END_OF_INPUT);	3461 : RegExpAssertion::END_OF_INPUT);

3460 case '.':	3462 case '.': {

3461 Advance();	3463 Advance();

3462 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.'));	3464 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

	3465 CharacterRange::AddClassEscape('.', ranges);

	3466 atom = new RegExpCharacterClass(ranges, false);

3463 break;	3467 break;

	3468 }

3464 case '(':	3469 case '(':

3465 atom = ParseGroup(CHECK_OK);	3470 atom = ParseGroup(CHECK_OK);

3466 break;	3471 break;

3467 case '[':	3472 case '[':

3468 atom = ParseCharacterClass(CHECK_OK);	3473 atom = ParseCharacterClass(CHECK_OK);

3469 break;	3474 break;

3470 // Atom ::	3475 // Atom ::

3471 // \ AtomEscape	3476 // \ AtomEscape

3472 case '\\':	3477 case '\\':

3473 if (has_next()) {	3478 if (has_next()) {

3474 switch (next()) {	3479 switch (next()) {

3475 case 'b':	3480 case 'b':

3476 Advance(2);	3481 Advance(2);

3477 return new RegExpAssertion(RegExpAssertion::BOUNDARY);	3482 return new RegExpAssertion(RegExpAssertion::BOUNDARY);

3478 case 'B':	3483 case 'B':

3479 Advance(2);	3484 Advance(2);

3480 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY);	3485 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY);

3481 // AtomEscape ::	3486 // AtomEscape ::

3482 // CharacterClassEscape	3487 // CharacterClassEscape

3483 //	3488 //

3484 // CharacterClassEscape :: one of	3489 // CharacterClassEscape :: one of

3485 // d D s S w W	3490 // d D s S w W

3486 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {	3491 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {

3487 uc32 c = next();	3492 uc32 c = next();

	3493 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

	3494 CharacterRange::AddClassEscape(c, ranges);

3488 Advance(2);	3495 Advance(2);

3489 atom = new RegExpCharacterClass(CharacterRange::CharacterClass(c));	3496 atom = new RegExpCharacterClass(ranges, false);

3490 goto has_read_atom;	3497 goto has_read_atom;

3491 }	3498 }

3492 case '1': case '2': case '3': case '4': case '5': case '6':	3499 case '1': case '2': case '3': case '4': case '5': case '6':

3493 case '7': case '8': case '9': {	3500 case '7': case '8': case '9': {

3494 int index = 0;	3501 int index = 0;

3495 if (ParseBackreferenceIndex(&index)) {	3502 if (ParseBackreferenceIndex(&index)) {

3496 atom = new RegExpBackreference(index);	3503 atom = new RegExpBackreference(index);

3497 goto has_read_atom;	3504 goto has_read_atom;

3498 } else {	3505 } else {

3499 // If this is not a backreference we go to the atom parser	3506 // If this is not a backreference we go to the atom parser

(...skipping 287 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3787 } else if (type == ':') {	3794 } else if (type == ':') {

3788 return body;	3795 return body;

3789 } else {	3796 } else {

3790 ASSERT(type == '=' \|\| type == '!');	3797 ASSERT(type == '=' \|\| type == '!');

3791 bool is_positive = (type == '=');	3798 bool is_positive = (type == '=');

3792 return new RegExpLookahead(body, is_positive);	3799 return new RegExpLookahead(body, is_positive);

3793 }	3800 }

3794 }	3801 }

3795	3802

3796	3803

3797 CharacterRange RegExpParser::ParseClassAtom(bool* ok) {	3804 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,

	3805 ZoneList<CharacterRange>* ranges,

	3806 bool* ok) {

	3807 ASSERT_EQ(false, *is_char_class);

3798 uc32 first = current();	3808 uc32 first = current();

3799 if (first == '\\') {	3809 if (first == '\\') {

3800 switch (next()) {	3810 switch (next()) {

3801 case 'b':	3811 case 'b':

3802 Advance(2);	3812 Advance(2);

3803 return CharacterRange::Singleton('\b');	3813 return CharacterRange::Singleton('\b');

3804 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {	3814 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {

	3815 *is_char_class = true;

3805 uc32 c = next();	3816 uc32 c = next();

	3817 CharacterRange::AddClassEscape(c, ranges);

3806 Advance(2);	3818 Advance(2);

3807 return CharacterRange::CharacterClass(c);	3819 return NULL;

3808 }	3820 }

3809 default:	3821 default:

3810 uc32 c = ParseCharacterEscape(CHECK_OK);	3822 uc32 c = ParseCharacterEscape(CHECK_OK);

3811 return CharacterRange::Singleton(c);	3823 return CharacterRange::Singleton(c);

3812 }	3824 }

3813 } else {	3825 } else {

3814 Advance();	3826 Advance();

3815 return CharacterRange::Singleton(first);	3827 return CharacterRange::Singleton(first);

3816 }	3828 }

3817 }	3829 }

(...skipping 10 matching lines...) Expand all Loading...
3828 if (current() == '^') {	3840 if (current() == '^') {

3829 is_negated = true;	3841 is_negated = true;

3830 Advance();	3842 Advance();

3831 }	3843 }

3832 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);	3844 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

3833 while (has_more() && current() != ']') {	3845 while (has_more() && current() != ']') {

3834 if (current() == '-') {	3846 if (current() == '-') {

3835 Advance();	3847 Advance();

3836 ranges->Add(CharacterRange::Singleton('-'));	3848 ranges->Add(CharacterRange::Singleton('-'));

3837 } else {	3849 } else {

3838 CharacterRange first = ParseClassAtom(CHECK_OK);	3850 bool is_char_class = false;

3839 if (!first.is_character_class() && current() == '-') {	3851 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);

3840 Advance();	3852 if (!is_char_class) {

3841 CharacterRange next = ParseClassAtom(CHECK_OK);	3853 if (current() == '-') {

3842 if (next.is_character_class()) {	3854 Advance();

3843 return ReportError(CStrVector(kIllegal), CHECK_OK);	3855 CharacterRange next = ParseClassAtom(&is_char_class, ranges, CHECK_OK) ;

	3856 if (is_char_class) {

	3857 return ReportError(CStrVector(kIllegal), CHECK_OK);

	3858 }

	3859 if (first.from() > next.to()) {

	3860 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);

	3861 }

	3862 ranges->Add(CharacterRange::Range(first.from(), next.to()));

	3863 } else {

	3864 ranges->Add(first);

3844 }	3865 }

3845 if (first.from() > next.to()) {

3846 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);

3847 }

3848 ranges->Add(CharacterRange::Range(first.from(), next.to()));

3849 } else {

3850 ranges->Add(first);

3851 }	3866 }

3852 }	3867 }

3853 }	3868 }

3854 if (!has_more()) {	3869 if (!has_more()) {

3855 return ReportError(CStrVector(kUnterminated), CHECK_OK);	3870 return ReportError(CStrVector(kUnterminated), CHECK_OK);

3856 }	3871 }

3857 Advance();	3872 Advance();

3858 if (ranges->length() == 0) {	3873 if (ranges->length() == 0) {

3859 return RegExpEmpty::GetInstance();	3874 return RegExpEmpty::GetInstance();

3860 } else {	3875 } else {

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3973 start_position,	3988 start_position,

3974 is_expression);	3989 is_expression);

3975 return result;	3990 return result;

3976 }	3991 }

3977	3992

3978	3993

3979 #undef NEW	3994 #undef NEW

3980	3995

3981	3996

3982 } } // namespace v8::internal	3997 } } // namespace v8::internal

OLD	NEW

« src/jsregexp.cc ('K') | « src/list-inl.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »