Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 497 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 508 // must not be 'b' or 'B' since they are usually handle specially. | 508 // must not be 'b' or 'B' since they are usually handle specially. |
| 509 uc32 ParseClassCharacterEscape(bool* ok); | 509 uc32 ParseClassCharacterEscape(bool* ok); |
| 510 | 510 |
| 511 // Checks whether the following is a length-digit hexadecimal number, | 511 // Checks whether the following is a length-digit hexadecimal number, |
| 512 // and sets the value if it is. | 512 // and sets the value if it is. |
| 513 bool ParseHexEscape(int length, uc32* value); | 513 bool ParseHexEscape(int length, uc32* value); |
| 514 | 514 |
| 515 uc32 ParseControlLetterEscape(bool* ok); | 515 uc32 ParseControlLetterEscape(bool* ok); |
| 516 uc32 ParseOctalLiteral(); | 516 uc32 ParseOctalLiteral(); |
| 517 | 517 |
| 518 // Tries to parse the input as a backreference. If successful it | 518 // Tries to parse the input as a back reference. If successful it |
| 519 // stores the result in the output parameter and returns true. If | 519 // stores the result in the output parameter and returns true. If |
| 520 // it fails it will push back the characters read so the same characters | 520 // it fails it will push back the characters read so the same characters |
| 521 // can be reparsed. | 521 // can be reparsed. |
| 522 bool ParseBackreferenceIndex(int* index_out); | 522 bool ParseBackReferenceIndex(int* index_out); |
| 523 | 523 |
| 524 CharacterRange ParseClassAtom(bool* is_char_class, | 524 CharacterRange ParseClassAtom(bool* is_char_class, |
| 525 ZoneList<CharacterRange>* ranges, | 525 ZoneList<CharacterRange>* ranges, |
| 526 bool* ok); | 526 bool* ok); |
| 527 RegExpTree* ReportError(Vector<const char> message, bool* ok); | 527 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 528 void Advance(); | 528 void Advance(); |
| 529 void Advance(int dist); | 529 void Advance(int dist); |
| 530 void Reset(int pos); | 530 void Reset(int pos); |
| 531 | 531 |
| 532 bool HasCharacterEscapes(); | 532 bool HasCharacterEscapes(); |
| 533 | 533 |
| 534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 535 int position() { return next_pos_ - 1; } | 535 int position() { return next_pos_ - 1; } |
| 536 | 536 |
| 537 static const uc32 kEndMarker = (1 << 21); | 537 static const uc32 kEndMarker = (1 << 21); |
| 538 private: | 538 private: |
| 539 uc32 current() { return current_; } | 539 uc32 current() { return current_; } |
| 540 bool has_more() { return has_more_; } | 540 bool has_more() { return has_more_; } |
| 541 bool has_next() { return next_pos_ < in()->length(); } | 541 bool has_next() { return next_pos_ < in()->length(); } |
| 542 uc32 Next(); | 542 uc32 Next(); |
| 543 FlatStringReader* in() { return in_; } | 543 FlatStringReader* in() { return in_; } |
| 544 void ScanForCaptures(); | |
| 545 bool CaptureAvailable(int index); | |
| 544 uc32 current_; | 546 uc32 current_; |
| 545 bool has_more_; | 547 bool has_more_; |
| 546 bool multiline_mode_; | 548 bool multiline_mode_; |
| 547 int next_pos_; | 549 int next_pos_; |
| 548 FlatStringReader* in_; | 550 FlatStringReader* in_; |
| 549 Handle<String>* error_; | 551 Handle<String>* error_; |
| 550 bool has_character_escapes_; | 552 bool has_character_escapes_; |
| 553 bool is_scanned_for_captures_; | |
| 551 ZoneList<RegExpCapture*>* captures_; | 554 ZoneList<RegExpCapture*>* captures_; |
| 555 int capture_count_; | |
|
Christian Plesner Hansen
2008/11/21 11:16:39
This name could be misleading but I can't think of
| |
| 552 }; | 556 }; |
| 553 | 557 |
| 554 | 558 |
| 555 // A temporary scope stores information during parsing, just like | 559 // A temporary scope stores information during parsing, just like |
| 556 // a plain scope. However, temporary scopes are not kept around | 560 // a plain scope. However, temporary scopes are not kept around |
| 557 // after parsing or referenced by syntax trees so they can be stack- | 561 // after parsing or referenced by syntax trees so they can be stack- |
| 558 // allocated and hence used by the pre-parser. | 562 // allocated and hence used by the pre-parser. |
| 559 class TemporaryScope BASE_EMBEDDED { | 563 class TemporaryScope BASE_EMBEDDED { |
| 560 public: | 564 public: |
| 561 explicit TemporaryScope(Parser* parser); | 565 explicit TemporaryScope(Parser* parser); |
| (...skipping 2933 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3495 RegExpParser::RegExpParser(FlatStringReader* in, | 3499 RegExpParser::RegExpParser(FlatStringReader* in, |
| 3496 Handle<String>* error, | 3500 Handle<String>* error, |
| 3497 bool multiline_mode) | 3501 bool multiline_mode) |
| 3498 : current_(kEndMarker), | 3502 : current_(kEndMarker), |
| 3499 has_more_(true), | 3503 has_more_(true), |
| 3500 multiline_mode_(multiline_mode), | 3504 multiline_mode_(multiline_mode), |
| 3501 next_pos_(0), | 3505 next_pos_(0), |
| 3502 in_(in), | 3506 in_(in), |
| 3503 error_(error), | 3507 error_(error), |
| 3504 has_character_escapes_(false), | 3508 has_character_escapes_(false), |
| 3505 captures_(NULL) { | 3509 is_scanned_for_captures_(false), |
| 3510 captures_(NULL), | |
| 3511 capture_count_(0) { | |
| 3506 Advance(1); | 3512 Advance(1); |
| 3507 } | 3513 } |
| 3508 | 3514 |
| 3509 | 3515 |
| 3510 uc32 RegExpParser::Next() { | 3516 uc32 RegExpParser::Next() { |
| 3511 if (has_next()) { | 3517 if (has_next()) { |
| 3512 return in()->Get(next_pos_); | 3518 return in()->Get(next_pos_); |
| 3513 } else { | 3519 } else { |
| 3514 return kEndMarker; | 3520 return kEndMarker; |
| 3515 } | 3521 } |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3557 // Disjunction | 3563 // Disjunction |
| 3558 RegExpTree* RegExpParser::ParsePattern(bool* ok) { | 3564 RegExpTree* RegExpParser::ParsePattern(bool* ok) { |
| 3559 RegExpTree* result = ParseDisjunction(CHECK_OK); | 3565 RegExpTree* result = ParseDisjunction(CHECK_OK); |
| 3560 if (has_more()) { | 3566 if (has_more()) { |
| 3561 ReportError(CStrVector("Unmatched ')'"), CHECK_OK); | 3567 ReportError(CStrVector("Unmatched ')'"), CHECK_OK); |
| 3562 } | 3568 } |
| 3563 return result; | 3569 return result; |
| 3564 } | 3570 } |
| 3565 | 3571 |
| 3566 | 3572 |
| 3573 bool RegExpParser::CaptureAvailable(int index) { | |
| 3574 if (captures_ == NULL) return false; | |
| 3575 if (index >= captures_->length()) return false; | |
| 3576 RegExpCapture* capture = captures_->at(index); | |
| 3577 return capture != NULL && capture->available() == CAPTURE_AVAILABLE; | |
| 3578 } | |
| 3579 | |
| 3580 | |
| 3567 // Disjunction :: | 3581 // Disjunction :: |
| 3568 // Alternative | 3582 // Alternative |
| 3569 // Alternative | Disjunction | 3583 // Alternative | Disjunction |
| 3570 // Alternative :: | 3584 // Alternative :: |
| 3571 // [empty] | 3585 // [empty] |
| 3572 // Term Alternative | 3586 // Term Alternative |
| 3573 // Term :: | 3587 // Term :: |
| 3574 // Assertion | 3588 // Assertion |
| 3575 // Atom | 3589 // Atom |
| 3576 // Atom Quantifier | 3590 // Atom Quantifier |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3660 Advance(2); | 3674 Advance(2); |
| 3661 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | 3675 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3662 CharacterRange::AddClassEscape(c, ranges); | 3676 CharacterRange::AddClassEscape(c, ranges); |
| 3663 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | 3677 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3664 builder.AddAtom(atom); | 3678 builder.AddAtom(atom); |
| 3665 goto has_read_atom; // Avoid setting has_character_escapes_. | 3679 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3666 } | 3680 } |
| 3667 case '1': case '2': case '3': case '4': case '5': case '6': | 3681 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3668 case '7': case '8': case '9': { | 3682 case '7': case '8': case '9': { |
| 3669 int index = 0; | 3683 int index = 0; |
| 3670 if (ParseBackreferenceIndex(&index)) { | 3684 if (ParseBackReferenceIndex(&index)) { |
| 3671 RegExpCapture* capture = captures_->at(index - 1); | 3685 if (!CaptureAvailable(index - 1)) { |
| 3672 if (capture == NULL || capture->available() != CAPTURE_AVAILABLE) { | |
| 3673 // Prepare to ignore a following quantifier | 3686 // Prepare to ignore a following quantifier |
| 3674 builder.AddEmpty(); | 3687 builder.AddEmpty(); |
| 3675 goto has_read_atom; | 3688 goto has_read_atom; |
| 3676 } | 3689 } |
| 3677 RegExpTree* atom = new RegExpBackreference(capture); | 3690 RegExpCapture* capture = captures_->at(index - 1); |
| 3691 RegExpTree* atom = new RegExpBackReference(capture); | |
| 3678 builder.AddAtom(atom); | 3692 builder.AddAtom(atom); |
| 3679 goto has_read_atom; // Avoid setting has_character_escapes_. | 3693 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3680 } | 3694 } |
| 3681 uc32 first_digit = Next(); | 3695 uc32 first_digit = Next(); |
| 3682 if (first_digit == '8' || first_digit == '9') { | 3696 if (first_digit == '8' || first_digit == '9') { |
| 3683 // Treat as identity escape | 3697 // Treat as identity escape |
| 3684 builder.AddCharacter(first_digit); | 3698 builder.AddCharacter(first_digit); |
| 3685 Advance(2); | 3699 Advance(2); |
| 3686 break; | 3700 break; |
| 3687 } | 3701 } |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3837 case 's': case 'S': | 3851 case 's': case 'S': |
| 3838 case 'w': case 'W': | 3852 case 'w': case 'W': |
| 3839 return true; | 3853 return true; |
| 3840 default: | 3854 default: |
| 3841 return false; | 3855 return false; |
| 3842 } | 3856 } |
| 3843 } | 3857 } |
| 3844 #endif | 3858 #endif |
| 3845 | 3859 |
| 3846 | 3860 |
| 3847 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { | 3861 // In order to know whether an escape is a backreference or not we have to scan |
| 3862 // the entire regexp and find the number of capturing parentheses. However we | |
| 3863 // don't want to scan the regexp twice unless it is necessary. This mini-parser | |
| 3864 // is called when needed. It can see the difference between capturing and | |
| 3865 // noncapturing parentheses and can skip character classes and backslash-escaped | |
| 3866 // characters. | |
| 3867 void RegExpParser::ScanForCaptures() { | |
| 3868 int n; | |
| 3869 while ((n = current()) != kEndMarker) { | |
| 3870 Advance(); | |
| 3871 switch (n) { | |
| 3872 case '\\': | |
| 3873 Advance(); | |
| 3874 break; | |
| 3875 case '[': { | |
| 3876 int c; | |
| 3877 while ((c = current()) != kEndMarker) { | |
| 3878 Advance(); | |
| 3879 if (c == '\\') { | |
| 3880 Advance(); | |
| 3881 } else { | |
| 3882 if (c == ']') break; | |
| 3883 } | |
| 3884 } | |
| 3885 break; | |
| 3886 } | |
| 3887 case '(': | |
| 3888 if (current() != '?') capture_count_++; | |
| 3889 break; | |
| 3890 } | |
| 3891 } | |
| 3892 is_scanned_for_captures_ = true; | |
| 3893 } | |
| 3894 | |
| 3895 | |
| 3896 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { | |
| 3848 ASSERT_EQ('\\', current()); | 3897 ASSERT_EQ('\\', current()); |
| 3849 ASSERT('1' <= Next() && Next() <= '9'); | 3898 ASSERT('1' <= Next() && Next() <= '9'); |
| 3850 // Try to parse a decimal literal that is no greater than the number | 3899 // Try to parse a decimal literal that is no greater than the number |
| 3851 // of previously encountered left capturing parentheses. | 3900 // of previously encountered left capturing parentheses. |
| 3852 // This is a not according the the ECMAScript specification. According to | 3901 // This is a not according the the ECMAScript specification. According to |
| 3853 // that, one must accept values up to the total number of left capturing | 3902 // that, one must accept values up to the total number of left capturing |
| 3854 // parentheses in the entire input, even if they are meaningless. | 3903 // parentheses in the entire input, even if they are meaningless. |
| 3855 if (captures_ == NULL) | 3904 if (!is_scanned_for_captures_) { |
| 3856 return false; | 3905 int saved_position = position(); |
| 3906 ScanForCaptures(); | |
| 3907 Reset(saved_position); | |
| 3908 } | |
| 3909 if (capture_count_ == 0) return false; | |
| 3857 int start = position(); | 3910 int start = position(); |
| 3858 int value = Next() - '0'; | 3911 int value = Next() - '0'; |
| 3859 if (value > captures_->length()) | 3912 if (value > capture_count_) return false; |
| 3860 return false; | |
| 3861 Advance(2); | 3913 Advance(2); |
| 3862 while (true) { | 3914 while (true) { |
| 3863 uc32 c = current(); | 3915 uc32 c = current(); |
| 3864 if (IsDecimalDigit(c)) { | 3916 if (IsDecimalDigit(c)) { |
| 3865 value = 10 * value + (c - '0'); | 3917 value = 10 * value + (c - '0'); |
| 3866 if (value > captures_->length()) { | 3918 if (value > capture_count_) { |
| 3867 Reset(start); | 3919 Reset(start); |
| 3868 return false; | 3920 return false; |
| 3869 } | 3921 } |
| 3870 Advance(); | 3922 Advance(); |
| 3871 } else { | 3923 } else { |
| 3872 break; | 3924 break; |
| 3873 } | 3925 } |
| 3874 } | 3926 } |
| 3875 *index_out = value; | 3927 *index_out = value; |
| 3876 return true; | 3928 return true; |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4061 break; | 4113 break; |
| 4062 default: | 4114 default: |
| 4063 ReportError(CStrVector("Invalid group"), CHECK_OK); | 4115 ReportError(CStrVector("Invalid group"), CHECK_OK); |
| 4064 break; | 4116 break; |
| 4065 } | 4117 } |
| 4066 } else { | 4118 } else { |
| 4067 if (captures_ == NULL) { | 4119 if (captures_ == NULL) { |
| 4068 captures_ = new ZoneList<RegExpCapture*>(2); | 4120 captures_ = new ZoneList<RegExpCapture*>(2); |
| 4069 } | 4121 } |
| 4070 captures_->Add(NULL); | 4122 captures_->Add(NULL); |
| 4123 if (!is_scanned_for_captures_) capture_count_++; | |
| 4071 } | 4124 } |
| 4072 int capture_index = captures_started(); | 4125 int capture_index = captures_started(); |
| 4073 RegExpTree* body = ParseDisjunction(CHECK_OK); | 4126 RegExpTree* body = ParseDisjunction(CHECK_OK); |
| 4074 if (current() != ')') { | 4127 if (current() != ')') { |
| 4075 ReportError(CStrVector("Unterminated group"), CHECK_OK); | 4128 ReportError(CStrVector("Unterminated group"), CHECK_OK); |
| 4076 } | 4129 } |
| 4077 Advance(); | 4130 Advance(); |
| 4078 | 4131 |
| 4079 int end_capture_index = captures_started(); | 4132 int end_capture_index = captures_started(); |
| 4080 if (type == '!') { | 4133 if (type == '!') { |
| (...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4303 start_position, | 4356 start_position, |
| 4304 is_expression); | 4357 is_expression); |
| 4305 return result; | 4358 return result; |
| 4306 } | 4359 } |
| 4307 | 4360 |
| 4308 | 4361 |
| 4309 #undef NEW | 4362 #undef NEW |
| 4310 | 4363 |
| 4311 | 4364 |
| 4312 } } // namespace v8::internal | 4365 } } // namespace v8::internal |
| OLD | NEW |