| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 302 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 313 // following quantifier | 313 // following quantifier |
| 314 void AddEmpty(); | 314 void AddEmpty(); |
| 315 void AddAtom(RegExpTree* tree); | 315 void AddAtom(RegExpTree* tree); |
| 316 void AddAssertion(RegExpTree* tree); | 316 void AddAssertion(RegExpTree* tree); |
| 317 void NewAlternative(); // '|' | 317 void NewAlternative(); // '|' |
| 318 void AddQuantifierToAtom(int min, int max, bool is_greedy); | 318 void AddQuantifierToAtom(int min, int max, bool is_greedy); |
| 319 RegExpTree* ToRegExp(); | 319 RegExpTree* ToRegExp(); |
| 320 private: | 320 private: |
| 321 void FlushCharacters(); | 321 void FlushCharacters(); |
| 322 void FlushText(); | 322 void FlushText(); |
| 323 bool FlushTerms(); | 323 void FlushTerms(); |
| 324 bool pending_empty_; | 324 bool pending_empty_; |
| 325 ZoneList<uc16>* characters_; | 325 ZoneList<uc16>* characters_; |
| 326 BufferedZoneList<RegExpTree, 2> terms_; | 326 BufferedZoneList<RegExpTree, 2> terms_; |
| 327 BufferedZoneList<RegExpTree, 2> text_; | 327 BufferedZoneList<RegExpTree, 2> text_; |
| 328 BufferedZoneList<RegExpTree, 2> alternatives_; | 328 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 329 #ifdef DEBUG | 329 #ifdef DEBUG |
| 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 331 #define LAST(x) last_added_ = x; | 331 #define LAST(x) last_added_ = x; |
| 332 #else | 332 #else |
| 333 #define LAST(x) | 333 #define LAST(x) |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 403 | 403 |
| 404 | 404 |
| 405 void RegExpBuilder::AddAssertion(RegExpTree* assert) { | 405 void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
| 406 FlushText(); | 406 FlushText(); |
| 407 terms_.Add(assert); | 407 terms_.Add(assert); |
| 408 LAST(ADD_ASSERT); | 408 LAST(ADD_ASSERT); |
| 409 } | 409 } |
| 410 | 410 |
| 411 | 411 |
| 412 void RegExpBuilder::NewAlternative() { | 412 void RegExpBuilder::NewAlternative() { |
| 413 if (!FlushTerms()) { | 413 FlushTerms(); |
| 414 alternatives_.Add(RegExpEmpty::GetInstance()); | |
| 415 } | |
| 416 } | 414 } |
| 417 | 415 |
| 418 | 416 |
| 419 bool RegExpBuilder::FlushTerms() { | 417 void RegExpBuilder::FlushTerms() { |
| 420 FlushText(); | 418 FlushText(); |
| 421 int num_terms = terms_.length(); | 419 int num_terms = terms_.length(); |
| 420 RegExpTree* alternative; |
| 422 if (num_terms == 0) { | 421 if (num_terms == 0) { |
| 423 return false; | 422 alternative = RegExpEmpty::GetInstance(); |
| 424 } | 423 } else if (num_terms == 1) { |
| 425 RegExpTree* alternative; | |
| 426 if (num_terms == 1) { | |
| 427 alternative = terms_.last(); | 424 alternative = terms_.last(); |
| 428 } else { | 425 } else { |
| 429 alternative = new RegExpAlternative(terms_.GetList()); | 426 alternative = new RegExpAlternative(terms_.GetList()); |
| 430 } | 427 } |
| 431 alternatives_.Add(alternative); | 428 alternatives_.Add(alternative); |
| 432 terms_.Clear(); | 429 terms_.Clear(); |
| 433 LAST(ADD_NONE); | 430 LAST(ADD_NONE); |
| 434 return true; | |
| 435 } | 431 } |
| 436 | 432 |
| 437 | 433 |
| 438 RegExpTree* RegExpBuilder::ToRegExp() { | 434 RegExpTree* RegExpBuilder::ToRegExp() { |
| 439 FlushTerms(); | 435 FlushTerms(); |
| 440 int num_alternatives = alternatives_.length(); | 436 int num_alternatives = alternatives_.length(); |
| 441 if (num_alternatives == 0) { | 437 if (num_alternatives == 0) { |
| 442 return RegExpEmpty::GetInstance(); | 438 return RegExpEmpty::GetInstance(); |
| 443 } | 439 } |
| 444 if (num_alternatives == 1) { | 440 if (num_alternatives == 1) { |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 499 RegExpParser(FlatStringReader* in, | 495 RegExpParser(FlatStringReader* in, |
| 500 Handle<String>* error, | 496 Handle<String>* error, |
| 501 bool multiline_mode); | 497 bool multiline_mode); |
| 502 RegExpTree* ParsePattern(bool* ok); | 498 RegExpTree* ParsePattern(bool* ok); |
| 503 RegExpTree* ParseDisjunction(bool* ok); | 499 RegExpTree* ParseDisjunction(bool* ok); |
| 504 RegExpTree* ParseGroup(bool* ok); | 500 RegExpTree* ParseGroup(bool* ok); |
| 505 RegExpTree* ParseCharacterClass(bool* ok); | 501 RegExpTree* ParseCharacterClass(bool* ok); |
| 506 | 502 |
| 507 // Parses a {...,...} quantifier and stores the range in the given | 503 // Parses a {...,...} quantifier and stores the range in the given |
| 508 // out parameters. | 504 // out parameters. |
| 509 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); | 505 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| 510 | 506 |
| 511 // Parses and returns a single escaped character. The character | 507 // Parses and returns a single escaped character. The character |
| 512 // must not be 'b' or 'B' since they are usually handle specially. | 508 // must not be 'b' or 'B' since they are usually handle specially. |
| 513 uc32 ParseClassCharacterEscape(bool* ok); | 509 uc32 ParseClassCharacterEscape(bool* ok); |
| 514 | 510 |
| 515 // Checks whether the following is a length-digit hexadecimal number, | 511 // Checks whether the following is a length-digit hexadecimal number, |
| 516 // and sets the value if it is. | 512 // and sets the value if it is. |
| 517 bool ParseHexEscape(int length, uc32* value); | 513 bool ParseHexEscape(int length, uc32* value); |
| 518 | 514 |
| 519 uc32 ParseControlLetterEscape(bool* ok); | 515 uc32 ParseControlLetterEscape(bool* ok); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 531 RegExpTree* ReportError(Vector<const char> message, bool* ok); | 527 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 532 void Advance(); | 528 void Advance(); |
| 533 void Advance(int dist); | 529 void Advance(int dist); |
| 534 void Reset(int pos); | 530 void Reset(int pos); |
| 535 | 531 |
| 536 bool HasCharacterEscapes(); | 532 bool HasCharacterEscapes(); |
| 537 | 533 |
| 538 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 539 int position() { return next_pos_ - 1; } | 535 int position() { return next_pos_ - 1; } |
| 540 | 536 |
| 541 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; | 537 static const uc32 kEndMarker = (1 << 21); |
| 542 private: | 538 private: |
| 543 uc32 current() { return current_; } | 539 uc32 current() { return current_; } |
| 544 bool has_more() { return has_more_; } | 540 bool has_more() { return has_more_; } |
| 545 bool has_next() { return next_pos_ < in()->length(); } | 541 bool has_next() { return next_pos_ < in()->length(); } |
| 546 uc32 Next(); | 542 uc32 Next(); |
| 547 FlatStringReader* in() { return in_; } | 543 FlatStringReader* in() { return in_; } |
| 548 uc32 current_; | 544 uc32 current_; |
| 549 bool has_more_; | 545 bool has_more_; |
| 550 bool multiline_mode_; | 546 bool multiline_mode_; |
| 551 int next_pos_; | 547 int next_pos_; |
| (...skipping 3043 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3595 if (capture->available() == CAPTURE_AVAILABLE) { | 3591 if (capture->available() == CAPTURE_AVAILABLE) { |
| 3596 capture->set_available(CAPTURE_UNREACHABLE); | 3592 capture->set_available(CAPTURE_UNREACHABLE); |
| 3597 } | 3593 } |
| 3598 } | 3594 } |
| 3599 capture_start_index = capture_new_alt_start_index; | 3595 capture_start_index = capture_new_alt_start_index; |
| 3600 continue; | 3596 continue; |
| 3601 } | 3597 } |
| 3602 case '*': | 3598 case '*': |
| 3603 case '+': | 3599 case '+': |
| 3604 case '?': | 3600 case '?': |
| 3605 case '{': | 3601 ReportError(CStrVector("Nothing to repeat"), CHECK_OK); |
| 3606 ReportError(CStrVector("Nothing to repeat."), CHECK_OK); | |
| 3607 case '^': { | 3602 case '^': { |
| 3608 Advance(); | 3603 Advance(); |
| 3609 RegExpAssertion::Type type = | 3604 RegExpAssertion::Type type = |
| 3610 multiline_mode_ ? RegExpAssertion::START_OF_LINE : | 3605 multiline_mode_ ? RegExpAssertion::START_OF_LINE : |
| 3611 RegExpAssertion::START_OF_INPUT; | 3606 RegExpAssertion::START_OF_INPUT; |
| 3612 builder.AddAssertion(new RegExpAssertion(type)); | 3607 builder.AddAssertion(new RegExpAssertion(type)); |
| 3613 continue; | 3608 continue; |
| 3614 } | 3609 } |
| 3615 case '$': { | 3610 case '$': { |
| 3616 Advance(); | 3611 Advance(); |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3747 break; | 3742 break; |
| 3748 } | 3743 } |
| 3749 default: | 3744 default: |
| 3750 // Identity escape. | 3745 // Identity escape. |
| 3751 builder.AddCharacter(Next()); | 3746 builder.AddCharacter(Next()); |
| 3752 Advance(2); | 3747 Advance(2); |
| 3753 break; | 3748 break; |
| 3754 } | 3749 } |
| 3755 has_character_escapes_ = true; | 3750 has_character_escapes_ = true; |
| 3756 break; | 3751 break; |
| 3752 case '{': { |
| 3753 int dummy; |
| 3754 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
| 3755 ReportError(CStrVector("Nothing to repeat"), CHECK_OK); |
| 3756 } |
| 3757 // fallthrough |
| 3758 } |
| 3757 default: | 3759 default: |
| 3758 builder.AddCharacter(current()); | 3760 builder.AddCharacter(current()); |
| 3759 Advance(); | 3761 Advance(); |
| 3760 break; | 3762 break; |
| 3761 } // end switch(current()) | 3763 } // end switch(current()) |
| 3762 | 3764 |
| 3763 has_read_atom: | 3765 has_read_atom: |
| 3764 int min; | 3766 int min; |
| 3765 int max; | 3767 int max; |
| 3766 switch (current()) { | 3768 switch (current()) { |
| 3767 // QuantifierPrefix :: | 3769 // QuantifierPrefix :: |
| 3768 // * | 3770 // * |
| 3769 // + | 3771 // + |
| 3770 // ? | 3772 // ? |
| 3771 // { | 3773 // { |
| 3772 case '*': | 3774 case '*': |
| 3773 min = 0; | 3775 min = 0; |
| 3774 max = RegExpQuantifier::kInfinity; | 3776 max = RegExpQuantifier::kInfinity; |
| 3775 Advance(); | 3777 Advance(); |
| 3776 break; | 3778 break; |
| 3777 case '+': | 3779 case '+': |
| 3778 min = 1; | 3780 min = 1; |
| 3779 max = RegExpQuantifier::kInfinity; | 3781 max = RegExpQuantifier::kInfinity; |
| 3780 Advance(); | 3782 Advance(); |
| 3781 break; | 3783 break; |
| 3782 case '?': | 3784 case '?': |
| 3783 min = 0; | 3785 min = 0; |
| 3784 max = 1; | 3786 max = 1; |
| 3785 Advance(); | 3787 Advance(); |
| 3786 break; | 3788 break; |
| 3787 case '{': | 3789 case '{': |
| 3788 ParseIntervalQuantifier(&min, &max, CHECK_OK); | 3790 if (ParseIntervalQuantifier(&min, &max)) { |
| 3789 break; | 3791 break; |
| 3792 } else { |
| 3793 continue; |
| 3794 } |
| 3790 default: | 3795 default: |
| 3791 continue; | 3796 continue; |
| 3792 } | 3797 } |
| 3793 bool is_greedy = true; | 3798 bool is_greedy = true; |
| 3794 if (current() == '?') { | 3799 if (current() == '?') { |
| 3795 is_greedy = false; | 3800 is_greedy = false; |
| 3796 Advance(); | 3801 Advance(); |
| 3797 } | 3802 } |
| 3798 builder.AddQuantifierToAtom(min, max, is_greedy); | 3803 builder.AddQuantifierToAtom(min, max, is_greedy); |
| 3799 } | 3804 } |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3869 } | 3874 } |
| 3870 *index_out = value; | 3875 *index_out = value; |
| 3871 return true; | 3876 return true; |
| 3872 } | 3877 } |
| 3873 | 3878 |
| 3874 | 3879 |
| 3875 // QuantifierPrefix :: | 3880 // QuantifierPrefix :: |
| 3876 // { DecimalDigits } | 3881 // { DecimalDigits } |
| 3877 // { DecimalDigits , } | 3882 // { DecimalDigits , } |
| 3878 // { DecimalDigits , DecimalDigits } | 3883 // { DecimalDigits , DecimalDigits } |
| 3879 void* RegExpParser::ParseIntervalQuantifier(int* min_out, | 3884 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| 3880 int* max_out, | |
| 3881 bool* ok) { | |
| 3882 ASSERT_EQ(current(), '{'); | 3885 ASSERT_EQ(current(), '{'); |
| 3883 static const char* kInvalidQuantifier = "Invalid quantifier"; | 3886 int start = position(); |
| 3884 Advance(); | 3887 Advance(); |
| 3885 int min = 0; | 3888 int min = 0; |
| 3886 if (!IsDecimalDigit(current())) { | 3889 if (!IsDecimalDigit(current())) { |
| 3887 // JSC allows {} and {,} as quantifiers (and { and } and all | 3890 Reset(start); |
| 3888 // sorts of crazy stuff) but my puny human brain has been unable | 3891 return false; |
| 3889 // to figure out what they mean exactly, if anything. For now | |
| 3890 // we follow the spec and report a syntax error. | |
| 3891 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | |
| 3892 } | 3892 } |
| 3893 while (IsDecimalDigit(current())) { | 3893 while (IsDecimalDigit(current())) { |
| 3894 min = 10 * min + (current() - '0'); | 3894 min = 10 * min + (current() - '0'); |
| 3895 Advance(); | 3895 Advance(); |
| 3896 } | 3896 } |
| 3897 int max = 0; | 3897 int max = 0; |
| 3898 if (current() == '}') { | 3898 if (current() == '}') { |
| 3899 max = min; | 3899 max = min; |
| 3900 Advance(); | 3900 Advance(); |
| 3901 } else if (current() == ',') { | 3901 } else if (current() == ',') { |
| 3902 Advance(); | 3902 Advance(); |
| 3903 if (current() == '}') { | 3903 if (current() == '}') { |
| 3904 max = RegExpQuantifier::kInfinity; | 3904 max = RegExpQuantifier::kInfinity; |
| 3905 Advance(); | 3905 Advance(); |
| 3906 } else { | 3906 } else { |
| 3907 while (IsDecimalDigit(current())) { | 3907 while (IsDecimalDigit(current())) { |
| 3908 max = 10 * max + (current() - '0'); | 3908 max = 10 * max + (current() - '0'); |
| 3909 Advance(); | 3909 Advance(); |
| 3910 } | 3910 } |
| 3911 if (current() != '}') { | 3911 if (current() != '}') { |
| 3912 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | 3912 Reset(start); |
| 3913 return false; |
| 3913 } | 3914 } |
| 3914 Advance(); | 3915 Advance(); |
| 3915 } | 3916 } |
| 3916 } else { | 3917 } else { |
| 3917 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | 3918 Reset(start); |
| 3919 return false; |
| 3918 } | 3920 } |
| 3919 *min_out = min; | 3921 *min_out = min; |
| 3920 *max_out = max; | 3922 *max_out = max; |
| 3921 return NULL; | 3923 return true; |
| 3922 } | 3924 } |
| 3923 | 3925 |
| 3924 | 3926 |
| 3925 // Upper and lower case letters differ by one bit. | 3927 // Upper and lower case letters differ by one bit. |
| 3926 STATIC_CHECK(('a' ^ 'A') == 0x20); | 3928 STATIC_CHECK(('a' ^ 'A') == 0x20); |
| 3927 | 3929 |
| 3928 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { | 3930 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { |
| 3929 if (!has_more()) { | 3931 if (!has_more()) { |
| 3930 ReportError(CStrVector("\\c at end of pattern"), ok); | 3932 ReportError(CStrVector("\\c at end of pattern"), ok); |
| 3931 return '\0'; | 3933 return '\0'; |
| (...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4145 is_negated = true; | 4147 is_negated = true; |
| 4146 Advance(); | 4148 Advance(); |
| 4147 } | 4149 } |
| 4148 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 4150 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 4149 while (has_more() && current() != ']') { | 4151 while (has_more() && current() != ']') { |
| 4150 bool is_char_class = false; | 4152 bool is_char_class = false; |
| 4151 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); | 4153 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 4152 if (!is_char_class) { | 4154 if (!is_char_class) { |
| 4153 if (current() == '-') { | 4155 if (current() == '-') { |
| 4154 Advance(); | 4156 Advance(); |
| 4155 if (current() == ']') { | 4157 if (current() == kEndMarker) { |
| 4158 // If we reach the end we break out of the loop and let the |
| 4159 // following code report an error. |
| 4160 break; |
| 4161 } else if (current() == ']') { |
| 4156 ranges->Add(first); | 4162 ranges->Add(first); |
| 4157 ranges->Add(CharacterRange::Singleton('-')); | 4163 ranges->Add(CharacterRange::Singleton('-')); |
| 4158 break; | 4164 break; |
| 4159 } | 4165 } |
| 4160 CharacterRange next = | 4166 CharacterRange next = |
| 4161 ParseClassAtom(&is_char_class, ranges, CHECK_OK); | 4167 ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 4162 if (is_char_class) { | 4168 if (is_char_class) { |
| 4163 return ReportError(CStrVector(kIllegal), CHECK_OK); | 4169 return ReportError(CStrVector(kIllegal), CHECK_OK); |
| 4164 } | 4170 } |
| 4165 if (first.from() > next.to()) { | 4171 if (first.from() > next.to()) { |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4297 start_position, | 4303 start_position, |
| 4298 is_expression); | 4304 is_expression); |
| 4299 return result; | 4305 return result; |
| 4300 } | 4306 } |
| 4301 | 4307 |
| 4302 | 4308 |
| 4303 #undef NEW | 4309 #undef NEW |
| 4304 | 4310 |
| 4305 | 4311 |
| 4306 } } // namespace v8::internal | 4312 } } // namespace v8::internal |
| OLD | NEW |