src/parser.cc - Issue 11473: Bugfixes

Side by Side Diff: src/parser.cc

Issue 11473: Bugfixes (Closed)

Patch Set: Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 302 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
313 // following quantifier	313 // following quantifier

314 void AddEmpty();	314 void AddEmpty();

315 void AddAtom(RegExpTree* tree);	315 void AddAtom(RegExpTree* tree);

316 void AddAssertion(RegExpTree* tree);	316 void AddAssertion(RegExpTree* tree);

317 void NewAlternative(); // '\|'	317 void NewAlternative(); // '\|'

318 void AddQuantifierToAtom(int min, int max, bool is_greedy);	318 void AddQuantifierToAtom(int min, int max, bool is_greedy);

319 RegExpTree* ToRegExp();	319 RegExpTree* ToRegExp();

320 private:	320 private:

321 void FlushCharacters();	321 void FlushCharacters();

322 void FlushText();	322 void FlushText();

323 bool FlushTerms();	323 void FlushTerms();

324 bool pending_empty_;	324 bool pending_empty_;

325 ZoneList<uc16>* characters_;	325 ZoneList<uc16>* characters_;

326 BufferedZoneList<RegExpTree, 2> terms_;	326 BufferedZoneList<RegExpTree, 2> terms_;

327 BufferedZoneList<RegExpTree, 2> text_;	327 BufferedZoneList<RegExpTree, 2> text_;

328 BufferedZoneList<RegExpTree, 2> alternatives_;	328 BufferedZoneList<RegExpTree, 2> alternatives_;

329 #ifdef DEBUG	329 #ifdef DEBUG

330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;	330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;

331 #define LAST(x) last_added_ = x;	331 #define LAST(x) last_added_ = x;

332 #else	332 #else

333 #define LAST(x)	333 #define LAST(x)

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
403	403

404	404

405 void RegExpBuilder::AddAssertion(RegExpTree* assert) {	405 void RegExpBuilder::AddAssertion(RegExpTree* assert) {

406 FlushText();	406 FlushText();

407 terms_.Add(assert);	407 terms_.Add(assert);

408 LAST(ADD_ASSERT);	408 LAST(ADD_ASSERT);

409 }	409 }

410	410

411	411

412 void RegExpBuilder::NewAlternative() {	412 void RegExpBuilder::NewAlternative() {

413 if (!FlushTerms()) {	413 FlushTerms();

414 alternatives_.Add(RegExpEmpty::GetInstance());

415 }

416 }	414 }

417	415

418	416

419 bool RegExpBuilder::FlushTerms() {	417 void RegExpBuilder::FlushTerms() {

420 FlushText();	418 FlushText();

421 int num_terms = terms_.length();	419 int num_terms = terms_.length();

	420 RegExpTree* alternative;

422 if (num_terms == 0) {	421 if (num_terms == 0) {

423 return false;	422 alternative = RegExpEmpty::GetInstance();

424 }	423 } else if (num_terms == 1) {

425 RegExpTree* alternative;

426 if (num_terms == 1) {

427 alternative = terms_.last();	424 alternative = terms_.last();

428 } else {	425 } else {

429 alternative = new RegExpAlternative(terms_.GetList());	426 alternative = new RegExpAlternative(terms_.GetList());

430 }	427 }

431 alternatives_.Add(alternative);	428 alternatives_.Add(alternative);

432 terms_.Clear();	429 terms_.Clear();

433 LAST(ADD_NONE);	430 LAST(ADD_NONE);

434 return true;

435 }	431 }

436	432

437	433

438 RegExpTree* RegExpBuilder::ToRegExp() {	434 RegExpTree* RegExpBuilder::ToRegExp() {

439 FlushTerms();	435 FlushTerms();

440 int num_alternatives = alternatives_.length();	436 int num_alternatives = alternatives_.length();

441 if (num_alternatives == 0) {	437 if (num_alternatives == 0) {

442 return RegExpEmpty::GetInstance();	438 return RegExpEmpty::GetInstance();

443 }	439 }

444 if (num_alternatives == 1) {	440 if (num_alternatives == 1) {

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
499 RegExpParser(FlatStringReader* in,	495 RegExpParser(FlatStringReader* in,

500 Handle<String>* error,	496 Handle<String>* error,

501 bool multiline_mode);	497 bool multiline_mode);

502 RegExpTree* ParsePattern(bool* ok);	498 RegExpTree* ParsePattern(bool* ok);

503 RegExpTree* ParseDisjunction(bool* ok);	499 RegExpTree* ParseDisjunction(bool* ok);

504 RegExpTree* ParseGroup(bool* ok);	500 RegExpTree* ParseGroup(bool* ok);

505 RegExpTree* ParseCharacterClass(bool* ok);	501 RegExpTree* ParseCharacterClass(bool* ok);

506	502

507 // Parses a {...,...} quantifier and stores the range in the given	503 // Parses a {...,...} quantifier and stores the range in the given

508 // out parameters.	504 // out parameters.

509 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok);	505 bool ParseIntervalQuantifier(int* min_out, int* max_out);

510	506

511 // Parses and returns a single escaped character. The character	507 // Parses and returns a single escaped character. The character

512 // must not be 'b' or 'B' since they are usually handle specially.	508 // must not be 'b' or 'B' since they are usually handle specially.

513 uc32 ParseClassCharacterEscape(bool* ok);	509 uc32 ParseClassCharacterEscape(bool* ok);

514	510

515 // Checks whether the following is a length-digit hexadecimal number,	511 // Checks whether the following is a length-digit hexadecimal number,

516 // and sets the value if it is.	512 // and sets the value if it is.

517 bool ParseHexEscape(int length, uc32* value);	513 bool ParseHexEscape(int length, uc32* value);

518	514

519 uc32 ParseControlLetterEscape(bool* ok);	515 uc32 ParseControlLetterEscape(bool* ok);

(...skipping 11 matching lines...) Expand all Loading...
531 RegExpTree* ReportError(Vector<const char> message, bool* ok);	527 RegExpTree* ReportError(Vector<const char> message, bool* ok);

532 void Advance();	528 void Advance();

533 void Advance(int dist);	529 void Advance(int dist);

534 void Reset(int pos);	530 void Reset(int pos);

535	531

536 bool HasCharacterEscapes();	532 bool HasCharacterEscapes();

537	533

538 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }	534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }

539 int position() { return next_pos_ - 1; }	535 int position() { return next_pos_ - 1; }

540	536

541 static const uc32 kEndMarker = unibrow::Utf8::kBadChar;	537 static const uc32 kEndMarker = (1 << 21);

542 private:	538 private:

543 uc32 current() { return current_; }	539 uc32 current() { return current_; }

544 bool has_more() { return has_more_; }	540 bool has_more() { return has_more_; }

545 bool has_next() { return next_pos_ < in()->length(); }	541 bool has_next() { return next_pos_ < in()->length(); }

546 uc32 Next();	542 uc32 Next();

547 FlatStringReader* in() { return in_; }	543 FlatStringReader* in() { return in_; }

548 uc32 current_;	544 uc32 current_;

549 bool has_more_;	545 bool has_more_;

550 bool multiline_mode_;	546 bool multiline_mode_;

551 int next_pos_;	547 int next_pos_;

(...skipping 3043 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3595 if (capture->available() == CAPTURE_AVAILABLE) {	3591 if (capture->available() == CAPTURE_AVAILABLE) {

3596 capture->set_available(CAPTURE_UNREACHABLE);	3592 capture->set_available(CAPTURE_UNREACHABLE);

3597 }	3593 }

3598 }	3594 }

3599 capture_start_index = capture_new_alt_start_index;	3595 capture_start_index = capture_new_alt_start_index;

3600 continue;	3596 continue;

3601 }	3597 }

3602 case '*':	3598 case '*':

3603 case '+':	3599 case '+':

3604 case '?':	3600 case '?':

3605 case '{':	3601 ReportError(CStrVector("Nothing to repeat"), CHECK_OK);

3606 ReportError(CStrVector("Nothing to repeat."), CHECK_OK);

3607 case '^': {	3602 case '^': {

3608 Advance();	3603 Advance();

3609 RegExpAssertion::Type type =	3604 RegExpAssertion::Type type =

3610 multiline_mode_ ? RegExpAssertion::START_OF_LINE :	3605 multiline_mode_ ? RegExpAssertion::START_OF_LINE :

3611 RegExpAssertion::START_OF_INPUT;	3606 RegExpAssertion::START_OF_INPUT;

3612 builder.AddAssertion(new RegExpAssertion(type));	3607 builder.AddAssertion(new RegExpAssertion(type));

3613 continue;	3608 continue;

3614 }	3609 }

3615 case '$': {	3610 case '$': {

3616 Advance();	3611 Advance();

(...skipping 130 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3747 break;	3742 break;

3748 }	3743 }

3749 default:	3744 default:

3750 // Identity escape.	3745 // Identity escape.

3751 builder.AddCharacter(Next());	3746 builder.AddCharacter(Next());

3752 Advance(2);	3747 Advance(2);

3753 break;	3748 break;

3754 }	3749 }

3755 has_character_escapes_ = true;	3750 has_character_escapes_ = true;

3756 break;	3751 break;

	3752 case '{': {

	3753 int dummy;

	3754 if (ParseIntervalQuantifier(&dummy, &dummy)) {

	3755 ReportError(CStrVector("Nothing to repeat"), CHECK_OK);

	3756 }

	3757 // fallthrough

	3758 }

3757 default:	3759 default:

3758 builder.AddCharacter(current());	3760 builder.AddCharacter(current());

3759 Advance();	3761 Advance();

3760 break;	3762 break;

3761 } // end switch(current())	3763 } // end switch(current())

3762	3764

3763 has_read_atom:	3765 has_read_atom:

3764 int min;	3766 int min;

3765 int max;	3767 int max;

3766 switch (current()) {	3768 switch (current()) {

3767 // QuantifierPrefix ::	3769 // QuantifierPrefix ::

3768 // *	3770 // *

3769 // +	3771 // +

3770 // ?	3772 // ?

3771 // {	3773 // {

3772 case '*':	3774 case '*':

3773 min = 0;	3775 min = 0;

3774 max = RegExpQuantifier::kInfinity;	3776 max = RegExpQuantifier::kInfinity;

3775 Advance();	3777 Advance();

3776 break;	3778 break;

3777 case '+':	3779 case '+':

3778 min = 1;	3780 min = 1;

3779 max = RegExpQuantifier::kInfinity;	3781 max = RegExpQuantifier::kInfinity;

3780 Advance();	3782 Advance();

3781 break;	3783 break;

3782 case '?':	3784 case '?':

3783 min = 0;	3785 min = 0;

3784 max = 1;	3786 max = 1;

3785 Advance();	3787 Advance();

3786 break;	3788 break;

3787 case '{':	3789 case '{':

3788 ParseIntervalQuantifier(&min, &max, CHECK_OK);	3790 if (ParseIntervalQuantifier(&min, &max)) {

3789 break;	3791 break;

	3792 } else {

	3793 continue;

	3794 }

3790 default:	3795 default:

3791 continue;	3796 continue;

3792 }	3797 }

3793 bool is_greedy = true;	3798 bool is_greedy = true;

3794 if (current() == '?') {	3799 if (current() == '?') {

3795 is_greedy = false;	3800 is_greedy = false;

3796 Advance();	3801 Advance();

3797 }	3802 }

3798 builder.AddQuantifierToAtom(min, max, is_greedy);	3803 builder.AddQuantifierToAtom(min, max, is_greedy);

3799 }	3804 }

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3869 }	3874 }

3870 *index_out = value;	3875 *index_out = value;

3871 return true;	3876 return true;

3872 }	3877 }

3873	3878

3874	3879

3875 // QuantifierPrefix ::	3880 // QuantifierPrefix ::

3876 // { DecimalDigits }	3881 // { DecimalDigits }

3877 // { DecimalDigits , }	3882 // { DecimalDigits , }

3878 // { DecimalDigits , DecimalDigits }	3883 // { DecimalDigits , DecimalDigits }

3879 void* RegExpParser::ParseIntervalQuantifier(int* min_out,	3884 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {

3880 int* max_out,

3881 bool* ok) {

3882 ASSERT_EQ(current(), '{');	3885 ASSERT_EQ(current(), '{');

3883 static const char* kInvalidQuantifier = "Invalid quantifier";	3886 int start = position();

3884 Advance();	3887 Advance();

3885 int min = 0;	3888 int min = 0;

3886 if (!IsDecimalDigit(current())) {	3889 if (!IsDecimalDigit(current())) {

3887 // JSC allows {} and {,} as quantifiers (and { and } and all	3890 Reset(start);

3888 // sorts of crazy stuff) but my puny human brain has been unable	3891 return false;

3889 // to figure out what they mean exactly, if anything. For now

3890 // we follow the spec and report a syntax error.

3891 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);

3892 }	3892 }

3893 while (IsDecimalDigit(current())) {	3893 while (IsDecimalDigit(current())) {

3894 min = 10 * min + (current() - '0');	3894 min = 10 * min + (current() - '0');

3895 Advance();	3895 Advance();

3896 }	3896 }

3897 int max = 0;	3897 int max = 0;

3898 if (current() == '}') {	3898 if (current() == '}') {

3899 max = min;	3899 max = min;

3900 Advance();	3900 Advance();

3901 } else if (current() == ',') {	3901 } else if (current() == ',') {

3902 Advance();	3902 Advance();

3903 if (current() == '}') {	3903 if (current() == '}') {

3904 max = RegExpQuantifier::kInfinity;	3904 max = RegExpQuantifier::kInfinity;

3905 Advance();	3905 Advance();

3906 } else {	3906 } else {

3907 while (IsDecimalDigit(current())) {	3907 while (IsDecimalDigit(current())) {

3908 max = 10 * max + (current() - '0');	3908 max = 10 * max + (current() - '0');

3909 Advance();	3909 Advance();

3910 }	3910 }

3911 if (current() != '}') {	3911 if (current() != '}') {

3912 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);	3912 Reset(start);

	3913 return false;

3913 }	3914 }

3914 Advance();	3915 Advance();

3915 }	3916 }

3916 } else {	3917 } else {

3917 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);	3918 Reset(start);

	3919 return false;

3918 }	3920 }

3919 *min_out = min;	3921 *min_out = min;

3920 *max_out = max;	3922 *max_out = max;

3921 return NULL;	3923 return true;

3922 }	3924 }

3923	3925

3924	3926

3925 // Upper and lower case letters differ by one bit.	3927 // Upper and lower case letters differ by one bit.

3926 STATIC_CHECK(('a' ^ 'A') == 0x20);	3928 STATIC_CHECK(('a' ^ 'A') == 0x20);

3927	3929

3928 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) {	3930 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) {

3929 if (!has_more()) {	3931 if (!has_more()) {

3930 ReportError(CStrVector("\\c at end of pattern"), ok);	3932 ReportError(CStrVector("\\c at end of pattern"), ok);

3931 return '\0';	3933 return '\0';

(...skipping 213 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4145 is_negated = true;	4147 is_negated = true;

4146 Advance();	4148 Advance();

4147 }	4149 }

4148 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);	4150 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

4149 while (has_more() && current() != ']') {	4151 while (has_more() && current() != ']') {

4150 bool is_char_class = false;	4152 bool is_char_class = false;

4151 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);	4153 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);

4152 if (!is_char_class) {	4154 if (!is_char_class) {

4153 if (current() == '-') {	4155 if (current() == '-') {

4154 Advance();	4156 Advance();

4155 if (current() == ']') {	4157 if (current() == kEndMarker) {

	4158 // If we reach the end we break out of the loop and let the

	4159 // following code report an error.

	4160 break;

	4161 } else if (current() == ']') {

4156 ranges->Add(first);	4162 ranges->Add(first);

4157 ranges->Add(CharacterRange::Singleton('-'));	4163 ranges->Add(CharacterRange::Singleton('-'));

4158 break;	4164 break;

4159 }	4165 }

4160 CharacterRange next =	4166 CharacterRange next =

4161 ParseClassAtom(&is_char_class, ranges, CHECK_OK);	4167 ParseClassAtom(&is_char_class, ranges, CHECK_OK);

4162 if (is_char_class) {	4168 if (is_char_class) {

4163 return ReportError(CStrVector(kIllegal), CHECK_OK);	4169 return ReportError(CStrVector(kIllegal), CHECK_OK);

4164 }	4170 }

4165 if (first.from() > next.to()) {	4171 if (first.from() > next.to()) {

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4297 start_position,	4303 start_position,

4298 is_expression);	4304 is_expression);

4299 return result;	4305 return result;

4300 }	4306 }

4301	4307

4302	4308

4303 #undef NEW	4309 #undef NEW

4304	4310

4305	4311

4306 } } // namespace v8::internal	4312 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »