| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 489 UNREACHABLE(); | 489 UNREACHABLE(); |
| 490 return; | 490 return; |
| 491 } | 491 } |
| 492 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); | 492 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); |
| 493 LAST(ADD_TERM); | 493 LAST(ADD_TERM); |
| 494 } | 494 } |
| 495 | 495 |
| 496 | 496 |
| 497 class RegExpParser { | 497 class RegExpParser { |
| 498 public: | 498 public: |
| 499 RegExpParser(unibrow::CharacterStream* in, | 499 RegExpParser(FlatStringReader* in, |
| 500 Handle<String>* error, | 500 Handle<String>* error, |
| 501 bool multiline_mode); | 501 bool multiline_mode); |
| 502 RegExpTree* ParsePattern(bool* ok); | 502 RegExpTree* ParsePattern(bool* ok); |
| 503 RegExpTree* ParseDisjunction(bool* ok); | 503 RegExpTree* ParseDisjunction(bool* ok); |
| 504 RegExpTree* ParseGroup(bool* ok); | 504 RegExpTree* ParseGroup(bool* ok); |
| 505 RegExpTree* ParseCharacterClass(bool* ok); | 505 RegExpTree* ParseCharacterClass(bool* ok); |
| 506 | 506 |
| 507 // Parses a {...,...} quantifier and stores the range in the given | 507 // Parses a {...,...} quantifier and stores the range in the given |
| 508 // out parameters. | 508 // out parameters. |
| 509 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); | 509 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 524 // it fails it will push back the characters read so the same characters | 524 // it fails it will push back the characters read so the same characters |
| 525 // can be reparsed. | 525 // can be reparsed. |
| 526 bool ParseBackreferenceIndex(int* index_out); | 526 bool ParseBackreferenceIndex(int* index_out); |
| 527 | 527 |
| 528 CharacterRange ParseClassAtom(bool* is_char_class, | 528 CharacterRange ParseClassAtom(bool* is_char_class, |
| 529 ZoneList<CharacterRange>* ranges, | 529 ZoneList<CharacterRange>* ranges, |
| 530 bool* ok); | 530 bool* ok); |
| 531 RegExpTree* ReportError(Vector<const char> message, bool* ok); | 531 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 532 void Advance(); | 532 void Advance(); |
| 533 void Advance(int dist); | 533 void Advance(int dist); |
| 534 // Pushes a read character (or potentially some other character) back | 534 void Reset(int pos); |
| 535 // on the input stream. After pushing it back, it becomes the character | |
| 536 // returned by current(). There is a limited amount of push-back buffer. | |
| 537 // A function using PushBack should check that it doesn't push back more | |
| 538 // than kMaxPushback characters, and it should not push back more characters | |
| 539 // than it has read. | |
| 540 void PushBack(uc32 character); | |
| 541 bool CanPushBack(); | |
| 542 | 535 |
| 543 bool HasCharacterEscapes(); | 536 bool HasCharacterEscapes(); |
| 544 | 537 |
| 545 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 538 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 539 int position() { return next_pos_ - 1; } |
| 546 | 540 |
| 547 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; | 541 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; |
| 548 private: | 542 private: |
| 549 uc32 current() { return current_; } | 543 uc32 current() { return current_; } |
| 550 uc32 next() { return next_; } | |
| 551 bool has_more() { return has_more_; } | 544 bool has_more() { return has_more_; } |
| 552 bool has_next() { return has_next_; } | 545 bool has_next() { return next_pos_ < in()->length(); } |
| 553 unibrow::CharacterStream* in() { return in_; } | 546 uc32 Next(); |
| 547 FlatStringReader* in() { return in_; } |
| 554 uc32 current_; | 548 uc32 current_; |
| 555 uc32 next_; | |
| 556 bool has_more_; | 549 bool has_more_; |
| 557 bool has_next_; | |
| 558 bool multiline_mode_; | 550 bool multiline_mode_; |
| 559 unibrow::CharacterStream* in_; | 551 int next_pos_; |
| 552 FlatStringReader* in_; |
| 560 Handle<String>* error_; | 553 Handle<String>* error_; |
| 561 static const int kMaxPushback = 5; | |
| 562 int pushback_count_; | |
| 563 uc32 pushback_buffer_[kMaxPushback]; | |
| 564 bool has_character_escapes_; | 554 bool has_character_escapes_; |
| 565 ZoneList<RegExpCapture*>* captures_; | 555 ZoneList<RegExpCapture*>* captures_; |
| 566 }; | 556 }; |
| 567 | 557 |
| 568 | 558 |
| 569 // A temporary scope stores information during parsing, just like | 559 // A temporary scope stores information during parsing, just like |
| 570 // a plain scope. However, temporary scopes are not kept around | 560 // a plain scope. However, temporary scopes are not kept around |
| 571 // after parsing or referenced by syntax trees so they can be stack- | 561 // after parsing or referenced by syntax trees so they can be stack- |
| 572 // allocated and hence used by the pre-parser. | 562 // allocated and hence used by the pre-parser. |
| 573 class TemporaryScope BASE_EMBEDDED { | 563 class TemporaryScope BASE_EMBEDDED { |
| (...skipping 2925 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3499 args->Add(new Literal(array)); | 3489 args->Add(new Literal(array)); |
| 3500 return new Throw(new CallRuntime(constructor, NULL, args), | 3490 return new Throw(new CallRuntime(constructor, NULL, args), |
| 3501 scanner().location().beg_pos); | 3491 scanner().location().beg_pos); |
| 3502 } | 3492 } |
| 3503 | 3493 |
| 3504 | 3494 |
| 3505 // ---------------------------------------------------------------------------- | 3495 // ---------------------------------------------------------------------------- |
| 3506 // Regular expressions | 3496 // Regular expressions |
| 3507 | 3497 |
| 3508 | 3498 |
| 3509 RegExpParser::RegExpParser(unibrow::CharacterStream* in, | 3499 RegExpParser::RegExpParser(FlatStringReader* in, |
| 3510 Handle<String>* error, | 3500 Handle<String>* error, |
| 3511 bool multiline_mode) | 3501 bool multiline_mode) |
| 3512 : current_(kEndMarker), | 3502 : current_(kEndMarker), |
| 3513 next_(kEndMarker), | |
| 3514 has_more_(true), | 3503 has_more_(true), |
| 3515 has_next_(true), | |
| 3516 multiline_mode_(multiline_mode), | 3504 multiline_mode_(multiline_mode), |
| 3505 next_pos_(0), |
| 3517 in_(in), | 3506 in_(in), |
| 3518 error_(error), | 3507 error_(error), |
| 3519 pushback_count_(0), | |
| 3520 has_character_escapes_(false), | 3508 has_character_escapes_(false), |
| 3521 captures_(NULL) { | 3509 captures_(NULL) { |
| 3522 Advance(2); | 3510 Advance(1); |
| 3511 } |
| 3512 |
| 3513 |
| 3514 uc32 RegExpParser::Next() { |
| 3515 if (has_next()) { |
| 3516 return in()->Get(next_pos_); |
| 3517 } else { |
| 3518 return kEndMarker; |
| 3519 } |
| 3523 } | 3520 } |
| 3524 | 3521 |
| 3525 | 3522 |
| 3526 void RegExpParser::Advance() { | 3523 void RegExpParser::Advance() { |
| 3527 current_ = next_; | 3524 if (next_pos_ < in()->length()) { |
| 3528 has_more_ = has_next_; | 3525 current_ = in()->Get(next_pos_); |
| 3529 if (pushback_count_ > 0) { | 3526 next_pos_++; |
| 3530 pushback_count_--; | |
| 3531 next_ = pushback_buffer_[pushback_count_]; | |
| 3532 } else if (in()->has_more()) { | |
| 3533 next_ = in()->GetNext(); | |
| 3534 } else { | 3527 } else { |
| 3535 next_ = kEndMarker; | 3528 current_ = kEndMarker; |
| 3536 has_next_ = false; | 3529 has_more_ = false; |
| 3537 } | 3530 } |
| 3538 } | 3531 } |
| 3539 | 3532 |
| 3540 | 3533 |
| 3534 void RegExpParser::Reset(int pos) { |
| 3535 next_pos_ = pos; |
| 3536 Advance(); |
| 3537 } |
| 3538 |
| 3539 |
| 3541 void RegExpParser::Advance(int dist) { | 3540 void RegExpParser::Advance(int dist) { |
| 3542 for (int i = 0; i < dist; i++) | 3541 for (int i = 0; i < dist; i++) |
| 3543 Advance(); | 3542 Advance(); |
| 3544 } | 3543 } |
| 3545 | 3544 |
| 3546 | 3545 |
| 3547 void RegExpParser::PushBack(uc32 character) { | |
| 3548 if (has_next_) { | |
| 3549 ASSERT(pushback_count_ < kMaxPushback); | |
| 3550 pushback_buffer_[pushback_count_] = next_; | |
| 3551 pushback_count_++; | |
| 3552 } | |
| 3553 | |
| 3554 next_ = current_; | |
| 3555 has_next_ = has_more_; | |
| 3556 | |
| 3557 current_ = character; | |
| 3558 has_more_ = true; | |
| 3559 } | |
| 3560 | |
| 3561 | |
| 3562 bool RegExpParser::CanPushBack() { | |
| 3563 return (pushback_count_ < kMaxPushback); | |
| 3564 } | |
| 3565 | |
| 3566 // Reports whether the parsed string atoms contain any characters that were | 3546 // Reports whether the parsed string atoms contain any characters that were |
| 3567 // escaped in the original pattern. If not, all atoms are proper substrings | 3547 // escaped in the original pattern. If not, all atoms are proper substrings |
| 3568 // of the original pattern. | 3548 // of the original pattern. |
| 3569 bool RegExpParser::HasCharacterEscapes() { | 3549 bool RegExpParser::HasCharacterEscapes() { |
| 3570 return has_character_escapes_; | 3550 return has_character_escapes_; |
| 3571 } | 3551 } |
| 3572 | 3552 |
| 3573 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { | 3553 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { |
| 3574 *ok = false; | 3554 *ok = false; |
| 3575 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); | 3555 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3655 break; | 3635 break; |
| 3656 } | 3636 } |
| 3657 case '[': { | 3637 case '[': { |
| 3658 RegExpTree* atom = ParseCharacterClass(CHECK_OK); | 3638 RegExpTree* atom = ParseCharacterClass(CHECK_OK); |
| 3659 builder.AddAtom(atom); | 3639 builder.AddAtom(atom); |
| 3660 break; | 3640 break; |
| 3661 } | 3641 } |
| 3662 // Atom :: | 3642 // Atom :: |
| 3663 // \ AtomEscape | 3643 // \ AtomEscape |
| 3664 case '\\': | 3644 case '\\': |
| 3665 switch (next()) { | 3645 switch (Next()) { |
| 3666 case kEndMarker: | 3646 case kEndMarker: |
| 3667 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); | 3647 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); |
| 3668 case 'b': | 3648 case 'b': |
| 3669 Advance(2); | 3649 Advance(2); |
| 3670 builder.AddAssertion( | 3650 builder.AddAssertion( |
| 3671 new RegExpAssertion(RegExpAssertion::BOUNDARY)); | 3651 new RegExpAssertion(RegExpAssertion::BOUNDARY)); |
| 3672 continue; | 3652 continue; |
| 3673 case 'B': | 3653 case 'B': |
| 3674 Advance(2); | 3654 Advance(2); |
| 3675 builder.AddAssertion( | 3655 builder.AddAssertion( |
| 3676 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | 3656 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
| 3677 continue; | 3657 continue; |
| 3678 // AtomEscape :: | 3658 // AtomEscape :: |
| 3679 // CharacterClassEscape | 3659 // CharacterClassEscape |
| 3680 // | 3660 // |
| 3681 // CharacterClassEscape :: one of | 3661 // CharacterClassEscape :: one of |
| 3682 // d D s S w W | 3662 // d D s S w W |
| 3683 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | 3663 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 3684 uc32 c = next(); | 3664 uc32 c = Next(); |
| 3685 Advance(2); | 3665 Advance(2); |
| 3686 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3666 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3687 CharacterRange::AddClassEscape(c, ranges); | 3667 CharacterRange::AddClassEscape(c, ranges); |
| 3688 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 3668 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3689 builder.AddAtom(atom); | 3669 builder.AddAtom(atom); |
| 3690 goto has_read_atom; // Avoid setting has_character_escapes_. | 3670 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3691 } | 3671 } |
| 3692 case '1': case '2': case '3': case '4': case '5': case '6': | 3672 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3693 case '7': case '8': case '9': { | 3673 case '7': case '8': case '9': { |
| 3694 int index = 0; | 3674 int index = 0; |
| 3695 if (ParseBackreferenceIndex(&index)) { | 3675 if (ParseBackreferenceIndex(&index)) { |
| 3696 RegExpCapture* capture = captures_->at(index - 1); | 3676 RegExpCapture* capture = captures_->at(index - 1); |
| 3697 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) { | 3677 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) { |
| 3698 // Prepare to ignore a following quantifier | 3678 // Prepare to ignore a following quantifier |
| 3699 builder.AddEmpty(); | 3679 builder.AddEmpty(); |
| 3700 goto has_read_atom; | 3680 goto has_read_atom; |
| 3701 } | 3681 } |
| 3702 RegExpTree* atom = new RegExpBackreference(capture); | 3682 RegExpTree* atom = new RegExpBackreference(capture); |
| 3703 builder.AddAtom(atom); | 3683 builder.AddAtom(atom); |
| 3704 goto has_read_atom; // Avoid setting has_character_escapes_. | 3684 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3705 } | 3685 } |
| 3706 uc32 first_digit = next(); | 3686 uc32 first_digit = Next(); |
| 3707 if (first_digit == '8' || first_digit == '9') { | 3687 if (first_digit == '8' || first_digit == '9') { |
| 3708 // Treat as identity escape | 3688 // Treat as identity escape |
| 3709 builder.AddCharacter(first_digit); | 3689 builder.AddCharacter(first_digit); |
| 3710 Advance(2); | 3690 Advance(2); |
| 3711 break; | 3691 break; |
| 3712 } | 3692 } |
| 3713 } | 3693 } |
| 3714 // FALLTHROUGH | 3694 // FALLTHROUGH |
| 3715 case '0': { | 3695 case '0': { |
| 3716 Advance(); | 3696 Advance(); |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3761 uc32 value; | 3741 uc32 value; |
| 3762 if (ParseHexEscape(4, &value)) { | 3742 if (ParseHexEscape(4, &value)) { |
| 3763 builder.AddCharacter(value); | 3743 builder.AddCharacter(value); |
| 3764 } else { | 3744 } else { |
| 3765 builder.AddCharacter('u'); | 3745 builder.AddCharacter('u'); |
| 3766 } | 3746 } |
| 3767 break; | 3747 break; |
| 3768 } | 3748 } |
| 3769 default: | 3749 default: |
| 3770 // Identity escape. | 3750 // Identity escape. |
| 3771 builder.AddCharacter(next()); | 3751 builder.AddCharacter(Next()); |
| 3772 Advance(2); | 3752 Advance(2); |
| 3773 break; | 3753 break; |
| 3774 } | 3754 } |
| 3775 has_character_escapes_ = true; | 3755 has_character_escapes_ = true; |
| 3776 break; | 3756 break; |
| 3777 default: | 3757 default: |
| 3778 builder.AddCharacter(current()); | 3758 builder.AddCharacter(current()); |
| 3779 Advance(); | 3759 Advance(); |
| 3780 break; | 3760 break; |
| 3781 } // end switch(current()) | 3761 } // end switch(current()) |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3854 return true; | 3834 return true; |
| 3855 default: | 3835 default: |
| 3856 return false; | 3836 return false; |
| 3857 } | 3837 } |
| 3858 } | 3838 } |
| 3859 #endif | 3839 #endif |
| 3860 | 3840 |
| 3861 | 3841 |
| 3862 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { | 3842 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { |
| 3863 ASSERT_EQ('\\', current()); | 3843 ASSERT_EQ('\\', current()); |
| 3864 ASSERT('1' <= next() && next() <= '9'); | 3844 ASSERT('1' <= Next() && Next() <= '9'); |
| 3865 ASSERT_EQ(0, pushback_count_); | |
| 3866 // Try to parse a decimal literal that is no greater than the number | 3845 // Try to parse a decimal literal that is no greater than the number |
| 3867 // of previously encountered left capturing parentheses. | 3846 // of previously encountered left capturing parentheses. |
| 3868 // This is a not according the the ECMAScript specification. According to | 3847 // This is a not according the the ECMAScript specification. According to |
| 3869 // that, one must accept values up to the total number of left capturing | 3848 // that, one must accept values up to the total number of left capturing |
| 3870 // parentheses in the entire input, even if they are meaningless. | 3849 // parentheses in the entire input, even if they are meaningless. |
| 3871 if (captures_ == NULL) | 3850 if (captures_ == NULL) |
| 3872 return false; | 3851 return false; |
| 3873 int value = next() - '0'; | 3852 int start = position(); |
| 3853 int value = Next() - '0'; |
| 3874 if (value > captures_->length()) | 3854 if (value > captures_->length()) |
| 3875 return false; | 3855 return false; |
| 3876 static const int kMaxChars = kMaxPushback - 2; | |
| 3877 EmbeddedVector<uc32, kMaxChars> chars_seen; | |
| 3878 chars_seen[0] = next(); | |
| 3879 int char_count = 1; | |
| 3880 Advance(2); | 3856 Advance(2); |
| 3881 while (true) { | 3857 while (true) { |
| 3882 uc32 c = current(); | 3858 uc32 c = current(); |
| 3883 if (IsDecimalDigit(c)) { | 3859 if (IsDecimalDigit(c)) { |
| 3884 value = 10 * value + (c - '0'); | 3860 value = 10 * value + (c - '0'); |
| 3885 // To avoid reading past the end of the stack-allocated pushback | 3861 if (value > captures_->length()) { |
| 3886 // buffers we only read kMaxChars before giving up. | 3862 Reset(start); |
| 3887 if (value > captures_->length() || char_count > kMaxChars) { | |
| 3888 // If we give up we have to push the characters we read back | |
| 3889 // onto the pushback buffer in the reverse order. | |
| 3890 for (int i = 0; i < char_count; i++) { | |
| 3891 PushBack(chars_seen[char_count - i - 1]); | |
| 3892 } | |
| 3893 PushBack('\\'); | |
| 3894 return false; | 3863 return false; |
| 3895 } | 3864 } |
| 3896 chars_seen[char_count++] = current(); | |
| 3897 Advance(); | 3865 Advance(); |
| 3898 } else { | 3866 } else { |
| 3899 break; | 3867 break; |
| 3900 } | 3868 } |
| 3901 } | 3869 } |
| 3902 *index_out = value; | 3870 *index_out = value; |
| 3903 return true; | 3871 return true; |
| 3904 } | 3872 } |
| 3905 | 3873 |
| 3906 | 3874 |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3985 if (value < 32 && '0' <= current() && current() <= '7') { | 3953 if (value < 32 && '0' <= current() && current() <= '7') { |
| 3986 value = value * 8 + current() - '0'; | 3954 value = value * 8 + current() - '0'; |
| 3987 Advance(); | 3955 Advance(); |
| 3988 } | 3956 } |
| 3989 } | 3957 } |
| 3990 return value; | 3958 return value; |
| 3991 } | 3959 } |
| 3992 | 3960 |
| 3993 | 3961 |
| 3994 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { | 3962 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
| 3995 static const int kMaxChars = kMaxPushback; | 3963 int start = position(); |
| 3996 EmbeddedVector<uc32, kMaxChars> chars_seen; | |
| 3997 ASSERT(length <= kMaxChars); | |
| 3998 uc32 val = 0; | 3964 uc32 val = 0; |
| 3999 bool done = false; | 3965 bool done = false; |
| 4000 for (int i = 0; !done; i++) { | 3966 for (int i = 0; !done; i++) { |
| 4001 uc32 c = current(); | 3967 uc32 c = current(); |
| 4002 int d = HexValue(c); | 3968 int d = HexValue(c); |
| 4003 if (d < 0) { | 3969 if (d < 0) { |
| 4004 while (i > 0) { | 3970 Reset(start); |
| 4005 i--; | |
| 4006 PushBack(chars_seen[i]); | |
| 4007 } | |
| 4008 return false; | 3971 return false; |
| 4009 } | 3972 } |
| 4010 val = val * 16 + d; | 3973 val = val * 16 + d; |
| 4011 Advance(); | 3974 Advance(); |
| 4012 if (i < length - 1) { | 3975 if (i == length - 1) { |
| 4013 chars_seen[i] = c; | |
| 4014 } else { | |
| 4015 done = true; | 3976 done = true; |
| 4016 } | 3977 } |
| 4017 } | 3978 } |
| 4018 *value = val; | 3979 *value = val; |
| 4019 return true; | 3980 return true; |
| 4020 } | 3981 } |
| 4021 | 3982 |
| 4022 | 3983 |
| 4023 uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { | 3984 uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { |
| 4024 ASSERT(current() == '\\'); | 3985 ASSERT(current() == '\\'); |
| 4025 ASSERT(has_next() && !IsSpecialClassEscape(next())); | 3986 ASSERT(has_next() && !IsSpecialClassEscape(Next())); |
| 4026 Advance(); | 3987 Advance(); |
| 4027 switch (current()) { | 3988 switch (current()) { |
| 4028 // ControlEscape :: one of | 3989 // ControlEscape :: one of |
| 4029 // f n r t v | 3990 // f n r t v |
| 4030 case 'f': | 3991 case 'f': |
| 4031 Advance(); | 3992 Advance(); |
| 4032 return '\f'; | 3993 return '\f'; |
| 4033 case 'n': | 3994 case 'n': |
| 4034 Advance(); | 3995 Advance(); |
| 4035 return '\n'; | 3996 return '\n'; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4081 } | 4042 } |
| 4082 return 0; | 4043 return 0; |
| 4083 } | 4044 } |
| 4084 | 4045 |
| 4085 | 4046 |
| 4086 RegExpTree* RegExpParser::ParseGroup(bool* ok) { | 4047 RegExpTree* RegExpParser::ParseGroup(bool* ok) { |
| 4087 ASSERT_EQ(current(), '('); | 4048 ASSERT_EQ(current(), '('); |
| 4088 char type = '('; | 4049 char type = '('; |
| 4089 Advance(); | 4050 Advance(); |
| 4090 if (current() == '?') { | 4051 if (current() == '?') { |
| 4091 switch (next()) { | 4052 switch (Next()) { |
| 4092 case ':': case '=': case '!': | 4053 case ':': case '=': case '!': |
| 4093 type = next(); | 4054 type = Next(); |
| 4094 Advance(2); | 4055 Advance(2); |
| 4095 break; | 4056 break; |
| 4096 default: | 4057 default: |
| 4097 ReportError(CStrVector("Invalid group"), CHECK_OK); | 4058 ReportError(CStrVector("Invalid group"), CHECK_OK); |
| 4098 break; | 4059 break; |
| 4099 } | 4060 } |
| 4100 } else { | 4061 } else { |
| 4101 if (captures_ == NULL) { | 4062 if (captures_ == NULL) { |
| 4102 captures_ = new ZoneList<RegExpCapture*>(2); | 4063 captures_ = new ZoneList<RegExpCapture*>(2); |
| 4103 } | 4064 } |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4143 } | 4104 } |
| 4144 } | 4105 } |
| 4145 | 4106 |
| 4146 | 4107 |
| 4147 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, | 4108 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, |
| 4148 ZoneList<CharacterRange>* ranges, | 4109 ZoneList<CharacterRange>* ranges, |
| 4149 bool* ok) { | 4110 bool* ok) { |
| 4150 ASSERT_EQ(false, *is_char_class); | 4111 ASSERT_EQ(false, *is_char_class); |
| 4151 uc32 first = current(); | 4112 uc32 first = current(); |
| 4152 if (first == '\\') { | 4113 if (first == '\\') { |
| 4153 switch (next()) { | 4114 switch (Next()) { |
| 4154 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | 4115 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { |
| 4155 *is_char_class = true; | 4116 *is_char_class = true; |
| 4156 uc32 c = next(); | 4117 uc32 c = Next(); |
| 4157 CharacterRange::AddClassEscape(c, ranges); | 4118 CharacterRange::AddClassEscape(c, ranges); |
| 4158 Advance(2); | 4119 Advance(2); |
| 4159 return NULL; | 4120 return NULL; |
| 4160 } | 4121 } |
| 4161 default: | 4122 default: |
| 4162 uc32 c = ParseClassCharacterEscape(CHECK_OK); | 4123 uc32 c = ParseClassCharacterEscape(CHECK_OK); |
| 4163 return CharacterRange::Singleton(c); | 4124 return CharacterRange::Singleton(c); |
| 4164 } | 4125 } |
| 4165 } else { | 4126 } else { |
| 4166 Advance(); | 4127 Advance(); |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4260 PreParser parser(no_script, allow_natives_syntax, extension); | 4221 PreParser parser(no_script, allow_natives_syntax, extension); |
| 4261 if (!parser.PreParseProgram(stream)) return NULL; | 4222 if (!parser.PreParseProgram(stream)) return NULL; |
| 4262 // The list owns the backing store so we need to clone the vector. | 4223 // The list owns the backing store so we need to clone the vector. |
| 4263 // That way, the result will be exactly the right size rather than | 4224 // That way, the result will be exactly the right size rather than |
| 4264 // the expected 50% too large. | 4225 // the expected 50% too large. |
| 4265 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); | 4226 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); |
| 4266 return new ScriptDataImpl(store); | 4227 return new ScriptDataImpl(store); |
| 4267 } | 4228 } |
| 4268 | 4229 |
| 4269 | 4230 |
| 4270 bool ParseRegExp(unibrow::CharacterStream* stream, RegExpParseResult* result) { | 4231 bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) { |
| 4271 ASSERT(result != NULL); | 4232 ASSERT(result != NULL); |
| 4272 // Get multiline flag somehow | 4233 // Get multiline flag somehow |
| 4273 RegExpParser parser(stream, &result->error, false); | 4234 RegExpParser parser(input, &result->error, false); |
| 4274 bool ok = true; | 4235 bool ok = true; |
| 4275 result->tree = parser.ParsePattern(&ok); | 4236 result->tree = parser.ParsePattern(&ok); |
| 4276 if (!ok) { | 4237 if (!ok) { |
| 4277 ASSERT(result->tree == NULL); | 4238 ASSERT(result->tree == NULL); |
| 4278 ASSERT(!result->error.is_null()); | 4239 ASSERT(!result->error.is_null()); |
| 4279 } else { | 4240 } else { |
| 4280 ASSERT(result->tree != NULL); | 4241 ASSERT(result->tree != NULL); |
| 4281 ASSERT(result->error.is_null()); | 4242 ASSERT(result->error.is_null()); |
| 4282 } | 4243 } |
| 4283 if (ok) { | 4244 if (ok) { |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4333 start_position, | 4294 start_position, |
| 4334 is_expression); | 4295 is_expression); |
| 4335 return result; | 4296 return result; |
| 4336 } | 4297 } |
| 4337 | 4298 |
| 4338 | 4299 |
| 4339 #undef NEW | 4300 #undef NEW |
| 4340 | 4301 |
| 4341 | 4302 |
| 4342 } } // namespace v8::internal | 4303 } } // namespace v8::internal |
| OLD | NEW |