Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1464)

Side by Side Diff: src/parser.cc

Issue 13784: * Fixed bug in handling of quantified look-aheads. (Closed)
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 509 matching lines...) Expand 10 before | Expand all | Expand 10 after
520 // it fails it will push back the characters read so the same characters 520 // it fails it will push back the characters read so the same characters
521 // can be reparsed. 521 // can be reparsed.
522 bool ParseBackReferenceIndex(int* index_out); 522 bool ParseBackReferenceIndex(int* index_out);
523 523
524 CharacterRange ParseClassAtom(uc16* char_class); 524 CharacterRange ParseClassAtom(uc16* char_class);
525 RegExpTree* ReportError(Vector<const char> message); 525 RegExpTree* ReportError(Vector<const char> message);
526 void Advance(); 526 void Advance();
527 void Advance(int dist); 527 void Advance(int dist);
528 void Reset(int pos); 528 void Reset(int pos);
529 529
530 bool HasCharacterEscapes(); 530 bool IsNonSimple();
Christian Plesner Hansen 2008/12/12 09:52:01 You should generally avoid negative method names.
531 531
532 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } 532 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
533 int position() { return next_pos_ - 1; } 533 int position() { return next_pos_ - 1; }
534 bool failed() { return failed_; } 534 bool failed() { return failed_; }
535 535
536 static const uc32 kEndMarker = (1 << 21); 536 static const uc32 kEndMarker = (1 << 21);
537 private: 537 private:
538 uc32 current() { return current_; } 538 uc32 current() { return current_; }
539 bool has_more() { return has_more_; } 539 bool has_more() { return has_more_; }
540 bool has_next() { return next_pos_ < in()->length(); } 540 bool has_next() { return next_pos_ < in()->length(); }
541 uc32 Next(); 541 uc32 Next();
542 FlatStringReader* in() { return in_; } 542 FlatStringReader* in() { return in_; }
543 void ScanForCaptures(); 543 void ScanForCaptures();
544 bool CaptureAvailable(int index); 544 bool CaptureAvailable(int index);
545 uc32 current_; 545 uc32 current_;
546 bool has_more_; 546 bool has_more_;
547 bool multiline_; 547 bool multiline_;
548 int next_pos_; 548 int next_pos_;
549 FlatStringReader* in_; 549 FlatStringReader* in_;
550 Handle<String>* error_; 550 Handle<String>* error_;
551 bool has_character_escapes_; 551 bool non_simple_;
Christian Plesner Hansen 2008/12/12 09:52:01 Ditto.
552 ZoneList<RegExpCapture*>* captures_; 552 ZoneList<RegExpCapture*>* captures_;
553 bool is_scanned_for_captures_; 553 bool is_scanned_for_captures_;
554 // The capture count is only valid after we have scanned for captures. 554 // The capture count is only valid after we have scanned for captures.
555 int capture_count_; 555 int capture_count_;
556 bool failed_; 556 bool failed_;
557 }; 557 };
558 558
559 559
560 // A temporary scope stores information during parsing, just like 560 // A temporary scope stores information during parsing, just like
561 // a plain scope. However, temporary scopes are not kept around 561 // a plain scope. However, temporary scopes are not kept around
(...skipping 2933 matching lines...) Expand 10 before | Expand all | Expand 10 after
3495 3495
3496 RegExpParser::RegExpParser(FlatStringReader* in, 3496 RegExpParser::RegExpParser(FlatStringReader* in,
3497 Handle<String>* error, 3497 Handle<String>* error,
3498 bool multiline) 3498 bool multiline)
3499 : current_(kEndMarker), 3499 : current_(kEndMarker),
3500 has_more_(true), 3500 has_more_(true),
3501 multiline_(multiline), 3501 multiline_(multiline),
3502 next_pos_(0), 3502 next_pos_(0),
3503 in_(in), 3503 in_(in),
3504 error_(error), 3504 error_(error),
3505 has_character_escapes_(false), 3505 non_simple_(false),
3506 captures_(NULL), 3506 captures_(NULL),
3507 is_scanned_for_captures_(false), 3507 is_scanned_for_captures_(false),
3508 capture_count_(0), 3508 capture_count_(0),
3509 failed_(false) { 3509 failed_(false) {
3510 Advance(1); 3510 Advance(1);
3511 } 3511 }
3512 3512
3513 3513
3514 uc32 RegExpParser::Next() { 3514 uc32 RegExpParser::Next() {
3515 if (has_next()) { 3515 if (has_next()) {
(...skipping 27 matching lines...) Expand all
3543 Advance(); 3543 Advance();
3544 } 3544 }
3545 3545
3546 3546
3547 void RegExpParser::Advance(int dist) { 3547 void RegExpParser::Advance(int dist) {
3548 for (int i = 0; i < dist; i++) 3548 for (int i = 0; i < dist; i++)
3549 Advance(); 3549 Advance();
3550 } 3550 }
3551 3551
3552 3552
3553 // Reports whether the parsed string atoms contain any characters that were 3553 // Reports whether the pattern might be used as a literal search string.
3554 // escaped in the original pattern. If not, all atoms are proper substrings 3554 // Only use if the result of the parse is a single atom node.
3555 // of the original pattern. 3555 bool RegExpParser::IsNonSimple() {
Christian Plesner Hansen 2008/12/12 09:52:01 This should be a simple accessor, not a full camel
3556 bool RegExpParser::HasCharacterEscapes() { 3556 return non_simple_;
3557 return has_character_escapes_;
3558 } 3557 }
3559 3558
3560 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { 3559 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
3561 failed_ = true; 3560 failed_ = true;
3562 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); 3561 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);
3563 // Zip to the end to make sure the no more input is read. 3562 // Zip to the end to make sure the no more input is read.
3564 current_ = kEndMarker; 3563 current_ = kEndMarker;
3565 next_pos_ = in()->length(); 3564 next_pos_ = in()->length();
3566 return NULL; 3565 return NULL;
3567 } 3566 }
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
3762 builder.AddCharacter('u'); 3761 builder.AddCharacter('u');
3763 } 3762 }
3764 break; 3763 break;
3765 } 3764 }
3766 default: 3765 default:
3767 // Identity escape. 3766 // Identity escape.
3768 builder.AddCharacter(Next()); 3767 builder.AddCharacter(Next());
3769 Advance(2); 3768 Advance(2);
3770 break; 3769 break;
3771 } 3770 }
3772 has_character_escapes_ = true; 3771 non_simple_ = true;
3773 break; 3772 break;
3774 case '{': { 3773 case '{': {
3775 int dummy; 3774 int dummy;
3776 if (ParseIntervalQuantifier(&dummy, &dummy)) { 3775 if (ParseIntervalQuantifier(&dummy, &dummy)) {
3777 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 3776 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
3778 } 3777 }
3779 // fallthrough 3778 // fallthrough
3780 } 3779 }
3781 default: 3780 default:
3782 builder.AddCharacter(current()); 3781 builder.AddCharacter(current());
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
3815 continue; 3814 continue;
3816 } 3815 }
3817 default: 3816 default:
3818 continue; 3817 continue;
3819 } 3818 }
3820 bool is_greedy = true; 3819 bool is_greedy = true;
3821 if (current() == '?') { 3820 if (current() == '?') {
3822 is_greedy = false; 3821 is_greedy = false;
3823 Advance(); 3822 Advance();
3824 } 3823 }
3824 non_simple_ = true; // Adding quantifier might *remove* look-ahead.
3825 builder.AddQuantifierToAtom(min, max, is_greedy); 3825 builder.AddQuantifierToAtom(min, max, is_greedy);
3826 } 3826 }
3827 } 3827 }
3828 3828
3829 class SourceCharacter { 3829 class SourceCharacter {
3830 public: 3830 public:
3831 static bool Is(uc32 c) { 3831 static bool Is(uc32 c) {
3832 switch (c) { 3832 switch (c) {
3833 // case ']': case '}': 3833 // case ']': case '}':
3834 // In spidermonkey and jsc these are treated as source characters 3834 // In spidermonkey and jsc these are treated as source characters
(...skipping 472 matching lines...) Expand 10 before | Expand all | Expand 10 after
4307 // Make sure we have a stack guard. 4307 // Make sure we have a stack guard.
4308 StackGuard guard; 4308 StackGuard guard;
4309 RegExpParser parser(input, &result->error, multiline); 4309 RegExpParser parser(input, &result->error, multiline);
4310 result->tree = parser.ParsePattern(); 4310 result->tree = parser.ParsePattern();
4311 if (parser.failed()) { 4311 if (parser.failed()) {
4312 ASSERT(result->tree == NULL); 4312 ASSERT(result->tree == NULL);
4313 ASSERT(!result->error.is_null()); 4313 ASSERT(!result->error.is_null());
4314 } else { 4314 } else {
4315 ASSERT(result->tree != NULL); 4315 ASSERT(result->tree != NULL);
4316 ASSERT(result->error.is_null()); 4316 ASSERT(result->error.is_null());
4317 result->has_character_escapes = parser.HasCharacterEscapes(); 4317 result->non_simple = parser.IsNonSimple();
4318 result->capture_count = parser.captures_started(); 4318 result->capture_count = parser.captures_started();
4319 } 4319 }
4320 return !parser.failed(); 4320 return !parser.failed();
4321 } 4321 }
4322 4322
4323 4323
4324 FunctionLiteral* MakeAST(bool compile_in_global_context, 4324 FunctionLiteral* MakeAST(bool compile_in_global_context,
4325 Handle<Script> script, 4325 Handle<Script> script,
4326 v8::Extension* extension, 4326 v8::Extension* extension,
4327 ScriptDataImpl* pre_data) { 4327 ScriptDataImpl* pre_data) {
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
4366 start_position, 4366 start_position,
4367 is_expression); 4367 is_expression);
4368 return result; 4368 return result;
4369 } 4369 }
4370 4370
4371 4371
4372 #undef NEW 4372 #undef NEW
4373 4373
4374 4374
4375 } } // namespace v8::internal 4375 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.cc ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698