| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 543 FlatStringReader* in() { return in_; } | 543 FlatStringReader* in() { return in_; } |
| 544 void ScanForCaptures(); | 544 void ScanForCaptures(); |
| 545 bool CaptureAvailable(int index); | 545 bool CaptureAvailable(int index); |
| 546 uc32 current_; | 546 uc32 current_; |
| 547 bool has_more_; | 547 bool has_more_; |
| 548 bool multiline_mode_; | 548 bool multiline_mode_; |
| 549 int next_pos_; | 549 int next_pos_; |
| 550 FlatStringReader* in_; | 550 FlatStringReader* in_; |
| 551 Handle<String>* error_; | 551 Handle<String>* error_; |
| 552 bool has_character_escapes_; | 552 bool has_character_escapes_; |
| 553 ZoneList<RegExpCapture*>* captures_; |
| 553 bool is_scanned_for_captures_; | 554 bool is_scanned_for_captures_; |
| 554 ZoneList<RegExpCapture*>* captures_; | 555 // The capture count is only valid after we have scanned for captures. |
| 555 int capture_count_; | 556 int capture_count_; |
| 556 }; | 557 }; |
| 557 | 558 |
| 558 | 559 |
| 559 // A temporary scope stores information during parsing, just like | 560 // A temporary scope stores information during parsing, just like |
| 560 // a plain scope. However, temporary scopes are not kept around | 561 // a plain scope. However, temporary scopes are not kept around |
| 561 // after parsing or referenced by syntax trees so they can be stack- | 562 // after parsing or referenced by syntax trees so they can be stack- |
| 562 // allocated and hence used by the pre-parser. | 563 // allocated and hence used by the pre-parser. |
| 563 class TemporaryScope BASE_EMBEDDED { | 564 class TemporaryScope BASE_EMBEDDED { |
| 564 public: | 565 public: |
| (...skipping 2934 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3499 RegExpParser::RegExpParser(FlatStringReader* in, | 3500 RegExpParser::RegExpParser(FlatStringReader* in, |
| 3500 Handle<String>* error, | 3501 Handle<String>* error, |
| 3501 bool multiline_mode) | 3502 bool multiline_mode) |
| 3502 : current_(kEndMarker), | 3503 : current_(kEndMarker), |
| 3503 has_more_(true), | 3504 has_more_(true), |
| 3504 multiline_mode_(multiline_mode), | 3505 multiline_mode_(multiline_mode), |
| 3505 next_pos_(0), | 3506 next_pos_(0), |
| 3506 in_(in), | 3507 in_(in), |
| 3507 error_(error), | 3508 error_(error), |
| 3508 has_character_escapes_(false), | 3509 has_character_escapes_(false), |
| 3510 captures_(NULL), |
| 3509 is_scanned_for_captures_(false), | 3511 is_scanned_for_captures_(false), |
| 3510 captures_(NULL), | |
| 3511 capture_count_(0) { | 3512 capture_count_(0) { |
| 3512 Advance(1); | 3513 Advance(1); |
| 3513 } | 3514 } |
| 3514 | 3515 |
| 3515 | 3516 |
| 3516 uc32 RegExpParser::Next() { | 3517 uc32 RegExpParser::Next() { |
| 3517 if (has_next()) { | 3518 if (has_next()) { |
| 3518 return in()->Get(next_pos_); | 3519 return in()->Get(next_pos_); |
| 3519 } else { | 3520 } else { |
| 3520 return kEndMarker; | 3521 return kEndMarker; |
| (...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3858 #endif | 3859 #endif |
| 3859 | 3860 |
| 3860 | 3861 |
| 3861 // In order to know whether an escape is a backreference or not we have to scan | 3862 // In order to know whether an escape is a backreference or not we have to scan |
| 3862 // the entire regexp and find the number of capturing parentheses. However we | 3863 // the entire regexp and find the number of capturing parentheses. However we |
| 3863 // don't want to scan the regexp twice unless it is necessary. This mini-parser | 3864 // don't want to scan the regexp twice unless it is necessary. This mini-parser |
| 3864 // is called when needed. It can see the difference between capturing and | 3865 // is called when needed. It can see the difference between capturing and |
| 3865 // noncapturing parentheses and can skip character classes and backslash-escaped | 3866 // noncapturing parentheses and can skip character classes and backslash-escaped |
| 3866 // characters. | 3867 // characters. |
| 3867 void RegExpParser::ScanForCaptures() { | 3868 void RegExpParser::ScanForCaptures() { |
| 3869 // Start with captures started previous to current position |
| 3870 int capture_count = captures_started(); |
| 3871 // Add count of captures after this position. |
| 3868 int n; | 3872 int n; |
| 3869 while ((n = current()) != kEndMarker) { | 3873 while ((n = current()) != kEndMarker) { |
| 3870 Advance(); | 3874 Advance(); |
| 3871 switch (n) { | 3875 switch (n) { |
| 3872 case '\\': | 3876 case '\\': |
| 3873 Advance(); | 3877 Advance(); |
| 3874 break; | 3878 break; |
| 3875 case '[': { | 3879 case '[': { |
| 3876 int c; | 3880 int c; |
| 3877 while ((c = current()) != kEndMarker) { | 3881 while ((c = current()) != kEndMarker) { |
| 3878 Advance(); | 3882 Advance(); |
| 3879 if (c == '\\') { | 3883 if (c == '\\') { |
| 3880 Advance(); | 3884 Advance(); |
| 3881 } else { | 3885 } else { |
| 3882 if (c == ']') break; | 3886 if (c == ']') break; |
| 3883 } | 3887 } |
| 3884 } | 3888 } |
| 3885 break; | 3889 break; |
| 3886 } | 3890 } |
| 3887 case '(': | 3891 case '(': |
| 3888 if (current() != '?') capture_count_++; | 3892 if (current() != '?') capture_count++; |
| 3889 break; | 3893 break; |
| 3890 } | 3894 } |
| 3891 } | 3895 } |
| 3896 capture_count_ = capture_count; |
| 3892 is_scanned_for_captures_ = true; | 3897 is_scanned_for_captures_ = true; |
| 3893 } | 3898 } |
| 3894 | 3899 |
| 3895 | 3900 |
| 3896 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { | 3901 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { |
| 3897 ASSERT_EQ('\\', current()); | 3902 ASSERT_EQ('\\', current()); |
| 3898 ASSERT('1' <= Next() && Next() <= '9'); | 3903 ASSERT('1' <= Next() && Next() <= '9'); |
| 3899 // Try to parse a decimal literal that is no greater than the number | 3904 // Try to parse a decimal literal that is no greater than the number |
| 3900 // of previously encountered left capturing parentheses. | 3905 // of previously encountered left capturing parentheses. |
| 3901 // This is a not according the the ECMAScript specification. According to | 3906 // This is a not according the the ECMAScript specification. According to |
| 3902 // that, one must accept values up to the total number of left capturing | 3907 // that, one must accept values up to the total number of left capturing |
| 3903 // parentheses in the entire input, even if they are meaningless. | 3908 // parentheses in the entire input, even if they are meaningless. |
| 3904 if (!is_scanned_for_captures_) { | |
| 3905 int saved_position = position(); | |
| 3906 ScanForCaptures(); | |
| 3907 Reset(saved_position); | |
| 3908 } | |
| 3909 if (capture_count_ == 0) return false; | |
| 3910 int start = position(); | 3909 int start = position(); |
| 3911 int value = Next() - '0'; | 3910 int value = Next() - '0'; |
| 3912 if (value > capture_count_) return false; | |
| 3913 Advance(2); | 3911 Advance(2); |
| 3914 while (true) { | 3912 while (true) { |
| 3915 uc32 c = current(); | 3913 uc32 c = current(); |
| 3916 if (IsDecimalDigit(c)) { | 3914 if (IsDecimalDigit(c)) { |
| 3917 value = 10 * value + (c - '0'); | 3915 value = 10 * value + (c - '0'); |
| 3918 if (value > capture_count_) { | |
| 3919 Reset(start); | |
| 3920 return false; | |
| 3921 } | |
| 3922 Advance(); | 3916 Advance(); |
| 3923 } else { | 3917 } else { |
| 3924 break; | 3918 break; |
| 3925 } | 3919 } |
| 3926 } | 3920 } |
| 3921 if (value > captures_started()) { |
| 3922 if (!is_scanned_for_captures_) { |
| 3923 int saved_position = position(); |
| 3924 ScanForCaptures(); |
| 3925 Reset(saved_position); |
| 3926 } |
| 3927 if (value > capture_count_) { |
| 3928 Reset(start); |
| 3929 return false; |
| 3930 } |
| 3931 } |
| 3927 *index_out = value; | 3932 *index_out = value; |
| 3928 return true; | 3933 return true; |
| 3929 } | 3934 } |
| 3930 | 3935 |
| 3931 | 3936 |
| 3932 // QuantifierPrefix :: | 3937 // QuantifierPrefix :: |
| 3933 // { DecimalDigits } | 3938 // { DecimalDigits } |
| 3934 // { DecimalDigits , } | 3939 // { DecimalDigits , } |
| 3935 // { DecimalDigits , DecimalDigits } | 3940 // { DecimalDigits , DecimalDigits } |
| 3936 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 3941 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| (...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4113 break; | 4118 break; |
| 4114 default: | 4119 default: |
| 4115 ReportError(CStrVector("Invalid group"), CHECK_OK); | 4120 ReportError(CStrVector("Invalid group"), CHECK_OK); |
| 4116 break; | 4121 break; |
| 4117 } | 4122 } |
| 4118 } else { | 4123 } else { |
| 4119 if (captures_ == NULL) { | 4124 if (captures_ == NULL) { |
| 4120 captures_ = new ZoneList<RegExpCapture*>(2); | 4125 captures_ = new ZoneList<RegExpCapture*>(2); |
| 4121 } | 4126 } |
| 4122 captures_->Add(NULL); | 4127 captures_->Add(NULL); |
| 4123 if (!is_scanned_for_captures_) capture_count_++; | |
| 4124 } | 4128 } |
| 4125 int capture_index = captures_started(); | 4129 int capture_index = captures_started(); |
| 4126 RegExpTree* body = ParseDisjunction(CHECK_OK); | 4130 RegExpTree* body = ParseDisjunction(CHECK_OK); |
| 4127 if (current() != ')') { | 4131 if (current() != ')') { |
| 4128 ReportError(CStrVector("Unterminated group"), CHECK_OK); | 4132 ReportError(CStrVector("Unterminated group"), CHECK_OK); |
| 4129 } | 4133 } |
| 4130 Advance(); | 4134 Advance(); |
| 4131 | 4135 |
| 4132 int end_capture_index = captures_started(); | 4136 int end_capture_index = captures_started(); |
| 4133 if (type == '!') { | 4137 if (type == '!') { |
| (...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4356 start_position, | 4360 start_position, |
| 4357 is_expression); | 4361 is_expression); |
| 4358 return result; | 4362 return result; |
| 4359 } | 4363 } |
| 4360 | 4364 |
| 4361 | 4365 |
| 4362 #undef NEW | 4366 #undef NEW |
| 4363 | 4367 |
| 4364 | 4368 |
| 4365 } } // namespace v8::internal | 4369 } } // namespace v8::internal |
| OLD | NEW |