Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(206)

Side by Side Diff: src/parser.cc

Issue 11473: Bugfixes (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after
313 // following quantifier 313 // following quantifier
314 void AddEmpty(); 314 void AddEmpty();
315 void AddAtom(RegExpTree* tree); 315 void AddAtom(RegExpTree* tree);
316 void AddAssertion(RegExpTree* tree); 316 void AddAssertion(RegExpTree* tree);
317 void NewAlternative(); // '|' 317 void NewAlternative(); // '|'
318 void AddQuantifierToAtom(int min, int max, bool is_greedy); 318 void AddQuantifierToAtom(int min, int max, bool is_greedy);
319 RegExpTree* ToRegExp(); 319 RegExpTree* ToRegExp();
320 private: 320 private:
321 void FlushCharacters(); 321 void FlushCharacters();
322 void FlushText(); 322 void FlushText();
323 bool FlushTerms(); 323 void FlushTerms();
324 bool pending_empty_; 324 bool pending_empty_;
325 ZoneList<uc16>* characters_; 325 ZoneList<uc16>* characters_;
326 BufferedZoneList<RegExpTree, 2> terms_; 326 BufferedZoneList<RegExpTree, 2> terms_;
327 BufferedZoneList<RegExpTree, 2> text_; 327 BufferedZoneList<RegExpTree, 2> text_;
328 BufferedZoneList<RegExpTree, 2> alternatives_; 328 BufferedZoneList<RegExpTree, 2> alternatives_;
329 #ifdef DEBUG 329 #ifdef DEBUG
330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
331 #define LAST(x) last_added_ = x; 331 #define LAST(x) last_added_ = x;
332 #else 332 #else
333 #define LAST(x) 333 #define LAST(x)
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
403 403
404 404
405 void RegExpBuilder::AddAssertion(RegExpTree* assert) { 405 void RegExpBuilder::AddAssertion(RegExpTree* assert) {
406 FlushText(); 406 FlushText();
407 terms_.Add(assert); 407 terms_.Add(assert);
408 LAST(ADD_ASSERT); 408 LAST(ADD_ASSERT);
409 } 409 }
410 410
411 411
412 void RegExpBuilder::NewAlternative() { 412 void RegExpBuilder::NewAlternative() {
413 if (!FlushTerms()) { 413 FlushTerms();
414 alternatives_.Add(RegExpEmpty::GetInstance());
415 }
416 } 414 }
417 415
418 416
419 bool RegExpBuilder::FlushTerms() { 417 void RegExpBuilder::FlushTerms() {
420 FlushText(); 418 FlushText();
421 int num_terms = terms_.length(); 419 int num_terms = terms_.length();
420 RegExpTree* alternative;
422 if (num_terms == 0) { 421 if (num_terms == 0) {
423 return false; 422 alternative = RegExpEmpty::GetInstance();
424 } 423 } else if (num_terms == 1) {
425 RegExpTree* alternative;
426 if (num_terms == 1) {
427 alternative = terms_.last(); 424 alternative = terms_.last();
428 } else { 425 } else {
429 alternative = new RegExpAlternative(terms_.GetList()); 426 alternative = new RegExpAlternative(terms_.GetList());
430 } 427 }
431 alternatives_.Add(alternative); 428 alternatives_.Add(alternative);
432 terms_.Clear(); 429 terms_.Clear();
433 LAST(ADD_NONE); 430 LAST(ADD_NONE);
434 return true;
435 } 431 }
436 432
437 433
438 RegExpTree* RegExpBuilder::ToRegExp() { 434 RegExpTree* RegExpBuilder::ToRegExp() {
439 FlushTerms(); 435 FlushTerms();
440 int num_alternatives = alternatives_.length(); 436 int num_alternatives = alternatives_.length();
441 if (num_alternatives == 0) { 437 if (num_alternatives == 0) {
442 return RegExpEmpty::GetInstance(); 438 return RegExpEmpty::GetInstance();
443 } 439 }
444 if (num_alternatives == 1) { 440 if (num_alternatives == 1) {
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
499 RegExpParser(FlatStringReader* in, 495 RegExpParser(FlatStringReader* in,
500 Handle<String>* error, 496 Handle<String>* error,
501 bool multiline_mode); 497 bool multiline_mode);
502 RegExpTree* ParsePattern(bool* ok); 498 RegExpTree* ParsePattern(bool* ok);
503 RegExpTree* ParseDisjunction(bool* ok); 499 RegExpTree* ParseDisjunction(bool* ok);
504 RegExpTree* ParseGroup(bool* ok); 500 RegExpTree* ParseGroup(bool* ok);
505 RegExpTree* ParseCharacterClass(bool* ok); 501 RegExpTree* ParseCharacterClass(bool* ok);
506 502
507 // Parses a {...,...} quantifier and stores the range in the given 503 // Parses a {...,...} quantifier and stores the range in the given
508 // out parameters. 504 // out parameters.
509 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); 505 bool ParseIntervalQuantifier(int* min_out, int* max_out);
510 506
511 // Parses and returns a single escaped character. The character 507 // Parses and returns a single escaped character. The character
512 // must not be 'b' or 'B' since they are usually handle specially. 508 // must not be 'b' or 'B' since they are usually handle specially.
513 uc32 ParseClassCharacterEscape(bool* ok); 509 uc32 ParseClassCharacterEscape(bool* ok);
514 510
515 // Checks whether the following is a length-digit hexadecimal number, 511 // Checks whether the following is a length-digit hexadecimal number,
516 // and sets the value if it is. 512 // and sets the value if it is.
517 bool ParseHexEscape(int length, uc32* value); 513 bool ParseHexEscape(int length, uc32* value);
518 514
519 uc32 ParseControlLetterEscape(bool* ok); 515 uc32 ParseControlLetterEscape(bool* ok);
(...skipping 11 matching lines...) Expand all
531 RegExpTree* ReportError(Vector<const char> message, bool* ok); 527 RegExpTree* ReportError(Vector<const char> message, bool* ok);
532 void Advance(); 528 void Advance();
533 void Advance(int dist); 529 void Advance(int dist);
534 void Reset(int pos); 530 void Reset(int pos);
535 531
536 bool HasCharacterEscapes(); 532 bool HasCharacterEscapes();
537 533
538 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
539 int position() { return next_pos_ - 1; } 535 int position() { return next_pos_ - 1; }
540 536
541 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; 537 static const uc32 kEndMarker = (1 << 21);
542 private: 538 private:
543 uc32 current() { return current_; } 539 uc32 current() { return current_; }
544 bool has_more() { return has_more_; } 540 bool has_more() { return has_more_; }
545 bool has_next() { return next_pos_ < in()->length(); } 541 bool has_next() { return next_pos_ < in()->length(); }
546 uc32 Next(); 542 uc32 Next();
547 FlatStringReader* in() { return in_; } 543 FlatStringReader* in() { return in_; }
548 uc32 current_; 544 uc32 current_;
549 bool has_more_; 545 bool has_more_;
550 bool multiline_mode_; 546 bool multiline_mode_;
551 int next_pos_; 547 int next_pos_;
(...skipping 3043 matching lines...) Expand 10 before | Expand all | Expand 10 after
3595 if (capture->available() == CAPTURE_AVAILABLE) { 3591 if (capture->available() == CAPTURE_AVAILABLE) {
3596 capture->set_available(CAPTURE_UNREACHABLE); 3592 capture->set_available(CAPTURE_UNREACHABLE);
3597 } 3593 }
3598 } 3594 }
3599 capture_start_index = capture_new_alt_start_index; 3595 capture_start_index = capture_new_alt_start_index;
3600 continue; 3596 continue;
3601 } 3597 }
3602 case '*': 3598 case '*':
3603 case '+': 3599 case '+':
3604 case '?': 3600 case '?':
3605 case '{': 3601 ReportError(CStrVector("Nothing to repeat"), CHECK_OK);
3606 ReportError(CStrVector("Nothing to repeat."), CHECK_OK);
3607 case '^': { 3602 case '^': {
3608 Advance(); 3603 Advance();
3609 RegExpAssertion::Type type = 3604 RegExpAssertion::Type type =
3610 multiline_mode_ ? RegExpAssertion::START_OF_LINE : 3605 multiline_mode_ ? RegExpAssertion::START_OF_LINE :
3611 RegExpAssertion::START_OF_INPUT; 3606 RegExpAssertion::START_OF_INPUT;
3612 builder.AddAssertion(new RegExpAssertion(type)); 3607 builder.AddAssertion(new RegExpAssertion(type));
3613 continue; 3608 continue;
3614 } 3609 }
3615 case '$': { 3610 case '$': {
3616 Advance(); 3611 Advance();
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
3747 break; 3742 break;
3748 } 3743 }
3749 default: 3744 default:
3750 // Identity escape. 3745 // Identity escape.
3751 builder.AddCharacter(Next()); 3746 builder.AddCharacter(Next());
3752 Advance(2); 3747 Advance(2);
3753 break; 3748 break;
3754 } 3749 }
3755 has_character_escapes_ = true; 3750 has_character_escapes_ = true;
3756 break; 3751 break;
3752 case '{': {
3753 int dummy;
3754 if (ParseIntervalQuantifier(&dummy, &dummy)) {
3755 ReportError(CStrVector("Nothing to repeat"), CHECK_OK);
3756 }
3757 // fallthrough
3758 }
3757 default: 3759 default:
3758 builder.AddCharacter(current()); 3760 builder.AddCharacter(current());
3759 Advance(); 3761 Advance();
3760 break; 3762 break;
3761 } // end switch(current()) 3763 } // end switch(current())
3762 3764
3763 has_read_atom: 3765 has_read_atom:
3764 int min; 3766 int min;
3765 int max; 3767 int max;
3766 switch (current()) { 3768 switch (current()) {
3767 // QuantifierPrefix :: 3769 // QuantifierPrefix ::
3768 // * 3770 // *
3769 // + 3771 // +
3770 // ? 3772 // ?
3771 // { 3773 // {
3772 case '*': 3774 case '*':
3773 min = 0; 3775 min = 0;
3774 max = RegExpQuantifier::kInfinity; 3776 max = RegExpQuantifier::kInfinity;
3775 Advance(); 3777 Advance();
3776 break; 3778 break;
3777 case '+': 3779 case '+':
3778 min = 1; 3780 min = 1;
3779 max = RegExpQuantifier::kInfinity; 3781 max = RegExpQuantifier::kInfinity;
3780 Advance(); 3782 Advance();
3781 break; 3783 break;
3782 case '?': 3784 case '?':
3783 min = 0; 3785 min = 0;
3784 max = 1; 3786 max = 1;
3785 Advance(); 3787 Advance();
3786 break; 3788 break;
3787 case '{': 3789 case '{':
3788 ParseIntervalQuantifier(&min, &max, CHECK_OK); 3790 if (ParseIntervalQuantifier(&min, &max)) {
3789 break; 3791 break;
3792 } else {
3793 continue;
3794 }
3790 default: 3795 default:
3791 continue; 3796 continue;
3792 } 3797 }
3793 bool is_greedy = true; 3798 bool is_greedy = true;
3794 if (current() == '?') { 3799 if (current() == '?') {
3795 is_greedy = false; 3800 is_greedy = false;
3796 Advance(); 3801 Advance();
3797 } 3802 }
3798 builder.AddQuantifierToAtom(min, max, is_greedy); 3803 builder.AddQuantifierToAtom(min, max, is_greedy);
3799 } 3804 }
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
3869 } 3874 }
3870 *index_out = value; 3875 *index_out = value;
3871 return true; 3876 return true;
3872 } 3877 }
3873 3878
3874 3879
3875 // QuantifierPrefix :: 3880 // QuantifierPrefix ::
3876 // { DecimalDigits } 3881 // { DecimalDigits }
3877 // { DecimalDigits , } 3882 // { DecimalDigits , }
3878 // { DecimalDigits , DecimalDigits } 3883 // { DecimalDigits , DecimalDigits }
3879 void* RegExpParser::ParseIntervalQuantifier(int* min_out, 3884 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
3880 int* max_out,
3881 bool* ok) {
3882 ASSERT_EQ(current(), '{'); 3885 ASSERT_EQ(current(), '{');
3883 static const char* kInvalidQuantifier = "Invalid quantifier"; 3886 int start = position();
3884 Advance(); 3887 Advance();
3885 int min = 0; 3888 int min = 0;
3886 if (!IsDecimalDigit(current())) { 3889 if (!IsDecimalDigit(current())) {
3887 // JSC allows {} and {,} as quantifiers (and { and } and all 3890 Reset(start);
3888 // sorts of crazy stuff) but my puny human brain has been unable 3891 return false;
3889 // to figure out what they mean exactly, if anything. For now
3890 // we follow the spec and report a syntax error.
3891 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);
3892 } 3892 }
3893 while (IsDecimalDigit(current())) { 3893 while (IsDecimalDigit(current())) {
3894 min = 10 * min + (current() - '0'); 3894 min = 10 * min + (current() - '0');
3895 Advance(); 3895 Advance();
3896 } 3896 }
3897 int max = 0; 3897 int max = 0;
3898 if (current() == '}') { 3898 if (current() == '}') {
3899 max = min; 3899 max = min;
3900 Advance(); 3900 Advance();
3901 } else if (current() == ',') { 3901 } else if (current() == ',') {
3902 Advance(); 3902 Advance();
3903 if (current() == '}') { 3903 if (current() == '}') {
3904 max = RegExpQuantifier::kInfinity; 3904 max = RegExpQuantifier::kInfinity;
3905 Advance(); 3905 Advance();
3906 } else { 3906 } else {
3907 while (IsDecimalDigit(current())) { 3907 while (IsDecimalDigit(current())) {
3908 max = 10 * max + (current() - '0'); 3908 max = 10 * max + (current() - '0');
3909 Advance(); 3909 Advance();
3910 } 3910 }
3911 if (current() != '}') { 3911 if (current() != '}') {
3912 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); 3912 Reset(start);
3913 return false;
3913 } 3914 }
3914 Advance(); 3915 Advance();
3915 } 3916 }
3916 } else { 3917 } else {
3917 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); 3918 Reset(start);
3919 return false;
3918 } 3920 }
3919 *min_out = min; 3921 *min_out = min;
3920 *max_out = max; 3922 *max_out = max;
3921 return NULL; 3923 return true;
3922 } 3924 }
3923 3925
3924 3926
3925 // Upper and lower case letters differ by one bit. 3927 // Upper and lower case letters differ by one bit.
3926 STATIC_CHECK(('a' ^ 'A') == 0x20); 3928 STATIC_CHECK(('a' ^ 'A') == 0x20);
3927 3929
3928 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { 3930 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) {
3929 if (!has_more()) { 3931 if (!has_more()) {
3930 ReportError(CStrVector("\\c at end of pattern"), ok); 3932 ReportError(CStrVector("\\c at end of pattern"), ok);
3931 return '\0'; 3933 return '\0';
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
4145 is_negated = true; 4147 is_negated = true;
4146 Advance(); 4148 Advance();
4147 } 4149 }
4148 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); 4150 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
4149 while (has_more() && current() != ']') { 4151 while (has_more() && current() != ']') {
4150 bool is_char_class = false; 4152 bool is_char_class = false;
4151 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); 4153 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);
4152 if (!is_char_class) { 4154 if (!is_char_class) {
4153 if (current() == '-') { 4155 if (current() == '-') {
4154 Advance(); 4156 Advance();
4155 if (current() == ']') { 4157 if (current() == kEndMarker) {
4158 // If we reach the end we break out of the loop and let the
4159 // following code report an error.
4160 break;
4161 } else if (current() == ']') {
4156 ranges->Add(first); 4162 ranges->Add(first);
4157 ranges->Add(CharacterRange::Singleton('-')); 4163 ranges->Add(CharacterRange::Singleton('-'));
4158 break; 4164 break;
4159 } 4165 }
4160 CharacterRange next = 4166 CharacterRange next =
4161 ParseClassAtom(&is_char_class, ranges, CHECK_OK); 4167 ParseClassAtom(&is_char_class, ranges, CHECK_OK);
4162 if (is_char_class) { 4168 if (is_char_class) {
4163 return ReportError(CStrVector(kIllegal), CHECK_OK); 4169 return ReportError(CStrVector(kIllegal), CHECK_OK);
4164 } 4170 }
4165 if (first.from() > next.to()) { 4171 if (first.from() > next.to()) {
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
4297 start_position, 4303 start_position,
4298 is_expression); 4304 is_expression);
4299 return result; 4305 return result;
4300 } 4306 }
4301 4307
4302 4308
4303 #undef NEW 4309 #undef NEW
4304 4310
4305 4311
4306 } } // namespace v8::internal 4312 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698