Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(402)

Side by Side Diff: src/parser.cc

Issue 12634: * Postponed irregexp parser forward scan of capturing parentheses until necessary. (Closed)
Patch Set: Rebase to head of bleeding edge Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after
543 FlatStringReader* in() { return in_; } 543 FlatStringReader* in() { return in_; }
544 void ScanForCaptures(); 544 void ScanForCaptures();
545 bool CaptureAvailable(int index); 545 bool CaptureAvailable(int index);
546 uc32 current_; 546 uc32 current_;
547 bool has_more_; 547 bool has_more_;
548 bool multiline_mode_; 548 bool multiline_mode_;
549 int next_pos_; 549 int next_pos_;
550 FlatStringReader* in_; 550 FlatStringReader* in_;
551 Handle<String>* error_; 551 Handle<String>* error_;
552 bool has_character_escapes_; 552 bool has_character_escapes_;
553 ZoneList<RegExpCapture*>* captures_;
553 bool is_scanned_for_captures_; 554 bool is_scanned_for_captures_;
554 ZoneList<RegExpCapture*>* captures_; 555 // The capture count is only valid after we have scanned for captures.
555 int capture_count_; 556 int capture_count_;
556 }; 557 };
557 558
558 559
559 // A temporary scope stores information during parsing, just like 560 // A temporary scope stores information during parsing, just like
560 // a plain scope. However, temporary scopes are not kept around 561 // a plain scope. However, temporary scopes are not kept around
561 // after parsing or referenced by syntax trees so they can be stack- 562 // after parsing or referenced by syntax trees so they can be stack-
562 // allocated and hence used by the pre-parser. 563 // allocated and hence used by the pre-parser.
563 class TemporaryScope BASE_EMBEDDED { 564 class TemporaryScope BASE_EMBEDDED {
564 public: 565 public:
(...skipping 2934 matching lines...) Expand 10 before | Expand all | Expand 10 after
3499 RegExpParser::RegExpParser(FlatStringReader* in, 3500 RegExpParser::RegExpParser(FlatStringReader* in,
3500 Handle<String>* error, 3501 Handle<String>* error,
3501 bool multiline_mode) 3502 bool multiline_mode)
3502 : current_(kEndMarker), 3503 : current_(kEndMarker),
3503 has_more_(true), 3504 has_more_(true),
3504 multiline_mode_(multiline_mode), 3505 multiline_mode_(multiline_mode),
3505 next_pos_(0), 3506 next_pos_(0),
3506 in_(in), 3507 in_(in),
3507 error_(error), 3508 error_(error),
3508 has_character_escapes_(false), 3509 has_character_escapes_(false),
3510 captures_(NULL),
3509 is_scanned_for_captures_(false), 3511 is_scanned_for_captures_(false),
3510 captures_(NULL),
3511 capture_count_(0) { 3512 capture_count_(0) {
3512 Advance(1); 3513 Advance(1);
3513 } 3514 }
3514 3515
3515 3516
3516 uc32 RegExpParser::Next() { 3517 uc32 RegExpParser::Next() {
3517 if (has_next()) { 3518 if (has_next()) {
3518 return in()->Get(next_pos_); 3519 return in()->Get(next_pos_);
3519 } else { 3520 } else {
3520 return kEndMarker; 3521 return kEndMarker;
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after
3858 #endif 3859 #endif
3859 3860
3860 3861
3861 // In order to know whether an escape is a backreference or not we have to scan 3862 // In order to know whether an escape is a backreference or not we have to scan
3862 // the entire regexp and find the number of capturing parentheses. However we 3863 // the entire regexp and find the number of capturing parentheses. However we
3863 // don't want to scan the regexp twice unless it is necessary. This mini-parser 3864 // don't want to scan the regexp twice unless it is necessary. This mini-parser
3864 // is called when needed. It can see the difference between capturing and 3865 // is called when needed. It can see the difference between capturing and
3865 // noncapturing parentheses and can skip character classes and backslash-escaped 3866 // noncapturing parentheses and can skip character classes and backslash-escaped
3866 // characters. 3867 // characters.
3867 void RegExpParser::ScanForCaptures() { 3868 void RegExpParser::ScanForCaptures() {
3869 // Start with captures started previous to current position
3870 int capture_count = captures_started();
3871 // Add count of captures after this position.
3868 int n; 3872 int n;
3869 while ((n = current()) != kEndMarker) { 3873 while ((n = current()) != kEndMarker) {
3870 Advance(); 3874 Advance();
3871 switch (n) { 3875 switch (n) {
3872 case '\\': 3876 case '\\':
3873 Advance(); 3877 Advance();
3874 break; 3878 break;
3875 case '[': { 3879 case '[': {
3876 int c; 3880 int c;
3877 while ((c = current()) != kEndMarker) { 3881 while ((c = current()) != kEndMarker) {
3878 Advance(); 3882 Advance();
3879 if (c == '\\') { 3883 if (c == '\\') {
3880 Advance(); 3884 Advance();
3881 } else { 3885 } else {
3882 if (c == ']') break; 3886 if (c == ']') break;
3883 } 3887 }
3884 } 3888 }
3885 break; 3889 break;
3886 } 3890 }
3887 case '(': 3891 case '(':
3888 if (current() != '?') capture_count_++; 3892 if (current() != '?') capture_count++;
3889 break; 3893 break;
3890 } 3894 }
3891 } 3895 }
3896 capture_count_ = capture_count;
3892 is_scanned_for_captures_ = true; 3897 is_scanned_for_captures_ = true;
3893 } 3898 }
3894 3899
3895 3900
3896 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { 3901 bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
3897 ASSERT_EQ('\\', current()); 3902 ASSERT_EQ('\\', current());
3898 ASSERT('1' <= Next() && Next() <= '9'); 3903 ASSERT('1' <= Next() && Next() <= '9');
3899 // Try to parse a decimal literal that is no greater than the number 3904 // Try to parse a decimal literal that is no greater than the number
3900 // of previously encountered left capturing parentheses. 3905 // of previously encountered left capturing parentheses.
3901 // This is a not according the the ECMAScript specification. According to 3906 // This is a not according the the ECMAScript specification. According to
3902 // that, one must accept values up to the total number of left capturing 3907 // that, one must accept values up to the total number of left capturing
3903 // parentheses in the entire input, even if they are meaningless. 3908 // parentheses in the entire input, even if they are meaningless.
3904 if (!is_scanned_for_captures_) {
3905 int saved_position = position();
3906 ScanForCaptures();
3907 Reset(saved_position);
3908 }
3909 if (capture_count_ == 0) return false;
3910 int start = position(); 3909 int start = position();
3911 int value = Next() - '0'; 3910 int value = Next() - '0';
3912 if (value > capture_count_) return false;
3913 Advance(2); 3911 Advance(2);
3914 while (true) { 3912 while (true) {
3915 uc32 c = current(); 3913 uc32 c = current();
3916 if (IsDecimalDigit(c)) { 3914 if (IsDecimalDigit(c)) {
3917 value = 10 * value + (c - '0'); 3915 value = 10 * value + (c - '0');
3918 if (value > capture_count_) {
3919 Reset(start);
3920 return false;
3921 }
3922 Advance(); 3916 Advance();
3923 } else { 3917 } else {
3924 break; 3918 break;
3925 } 3919 }
3926 } 3920 }
3921 if (value > captures_started()) {
3922 if (!is_scanned_for_captures_) {
3923 int saved_position = position();
3924 ScanForCaptures();
3925 Reset(saved_position);
3926 }
3927 if (value > capture_count_) {
3928 Reset(start);
3929 return false;
3930 }
3931 }
3927 *index_out = value; 3932 *index_out = value;
3928 return true; 3933 return true;
3929 } 3934 }
3930 3935
3931 3936
3932 // QuantifierPrefix :: 3937 // QuantifierPrefix ::
3933 // { DecimalDigits } 3938 // { DecimalDigits }
3934 // { DecimalDigits , } 3939 // { DecimalDigits , }
3935 // { DecimalDigits , DecimalDigits } 3940 // { DecimalDigits , DecimalDigits }
3936 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { 3941 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
4113 break; 4118 break;
4114 default: 4119 default:
4115 ReportError(CStrVector("Invalid group"), CHECK_OK); 4120 ReportError(CStrVector("Invalid group"), CHECK_OK);
4116 break; 4121 break;
4117 } 4122 }
4118 } else { 4123 } else {
4119 if (captures_ == NULL) { 4124 if (captures_ == NULL) {
4120 captures_ = new ZoneList<RegExpCapture*>(2); 4125 captures_ = new ZoneList<RegExpCapture*>(2);
4121 } 4126 }
4122 captures_->Add(NULL); 4127 captures_->Add(NULL);
4123 if (!is_scanned_for_captures_) capture_count_++;
4124 } 4128 }
4125 int capture_index = captures_started(); 4129 int capture_index = captures_started();
4126 RegExpTree* body = ParseDisjunction(CHECK_OK); 4130 RegExpTree* body = ParseDisjunction(CHECK_OK);
4127 if (current() != ')') { 4131 if (current() != ')') {
4128 ReportError(CStrVector("Unterminated group"), CHECK_OK); 4132 ReportError(CStrVector("Unterminated group"), CHECK_OK);
4129 } 4133 }
4130 Advance(); 4134 Advance();
4131 4135
4132 int end_capture_index = captures_started(); 4136 int end_capture_index = captures_started();
4133 if (type == '!') { 4137 if (type == '!') {
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after
4356 start_position, 4360 start_position,
4357 is_expression); 4361 is_expression);
4358 return result; 4362 return result;
4359 } 4363 }
4360 4364
4361 4365
4362 #undef NEW 4366 #undef NEW
4363 4367
4364 4368
4365 } } // namespace v8::internal 4369 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698