Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1630633002: [regexp] correctly advance zero length matches for global/unicode. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@backrefbounds
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 615 matching lines...) Expand 10 before | Expand all | Expand 10 after
626 } 626 }
627 SetLastCaptureCount(array, capture_register_count); 627 SetLastCaptureCount(array, capture_register_count);
628 SetLastSubject(array, *subject); 628 SetLastSubject(array, *subject);
629 SetLastInput(array, *subject); 629 SetLastInput(array, *subject);
630 return last_match_info; 630 return last_match_info;
631 } 631 }
632 632
633 633
634 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, 634 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
635 Handle<String> subject, 635 Handle<String> subject,
636 bool is_global,
637 Isolate* isolate) 636 Isolate* isolate)
638 : register_array_(NULL), 637 : register_array_(NULL),
639 register_array_size_(0), 638 register_array_size_(0),
640 regexp_(regexp), 639 regexp_(regexp),
641 subject_(subject) { 640 subject_(subject) {
642 #ifdef V8_INTERPRETED_REGEXP 641 #ifdef V8_INTERPRETED_REGEXP
643 bool interpreted = true; 642 bool interpreted = true;
644 #else 643 #else
645 bool interpreted = false; 644 bool interpreted = false;
646 #endif // V8_INTERPRETED_REGEXP 645 #endif // V8_INTERPRETED_REGEXP
647 646
648 if (regexp_->TypeTag() == JSRegExp::ATOM) { 647 if (regexp_->TypeTag() == JSRegExp::ATOM) {
649 static const int kAtomRegistersPerMatch = 2; 648 static const int kAtomRegistersPerMatch = 2;
650 registers_per_match_ = kAtomRegistersPerMatch; 649 registers_per_match_ = kAtomRegistersPerMatch;
651 // There is no distinction between interpreted and native for atom regexps. 650 // There is no distinction between interpreted and native for atom regexps.
652 interpreted = false; 651 interpreted = false;
653 } else { 652 } else {
654 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); 653 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
655 if (registers_per_match_ < 0) { 654 if (registers_per_match_ < 0) {
656 num_matches_ = -1; // Signal exception. 655 num_matches_ = -1; // Signal exception.
657 return; 656 return;
658 } 657 }
659 } 658 }
660 659
661 if (is_global && !interpreted) { 660 DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal);
661 if (!interpreted) {
662 register_array_size_ = 662 register_array_size_ =
663 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); 663 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
664 max_matches_ = register_array_size_ / registers_per_match_; 664 max_matches_ = register_array_size_ / registers_per_match_;
665 } else { 665 } else {
666 // Global loop in interpreted regexp is not implemented. We choose 666 // Global loop in interpreted regexp is not implemented. We choose
667 // the size of the offsets vector so that it can only store one match. 667 // the size of the offsets vector so that it can only store one match.
668 register_array_size_ = registers_per_match_; 668 register_array_size_ = registers_per_match_;
669 max_matches_ = 1; 669 max_matches_ = 1;
670 } 670 }
671 671
672 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { 672 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
673 register_array_ = NewArray<int32_t>(register_array_size_); 673 register_array_ = NewArray<int32_t>(register_array_size_);
674 } else { 674 } else {
675 register_array_ = isolate->jsregexp_static_offsets_vector(); 675 register_array_ = isolate->jsregexp_static_offsets_vector();
676 } 676 }
677 677
678 // Set state so that fetching the results the first time triggers a call 678 // Set state so that fetching the results the first time triggers a call
679 // to the compiled regexp. 679 // to the compiled regexp.
680 current_match_index_ = max_matches_ - 1; 680 current_match_index_ = max_matches_ - 1;
681 num_matches_ = max_matches_; 681 num_matches_ = max_matches_;
682 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. 682 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture.
683 DCHECK_GE(register_array_size_, registers_per_match_); 683 DCHECK_GE(register_array_size_, registers_per_match_);
684 int32_t* last_match = 684 int32_t* last_match =
685 &register_array_[current_match_index_ * registers_per_match_]; 685 &register_array_[current_match_index_ * registers_per_match_];
686 last_match[0] = -1; 686 last_match[0] = -1;
687 last_match[1] = 0; 687 last_match[1] = 0;
688 } 688 }
689 689
690 int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
691 if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 &&
692 last_index + 1 < subject_->length() &&
693 unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
694 unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
695 // Advance over the surrogate pair.
696 return last_index + 2;
697 }
698 return last_index + 1;
699 }
690 700
691 // ------------------------------------------------------------------- 701 // -------------------------------------------------------------------
692 // Implementation of the Irregexp regular expression engine. 702 // Implementation of the Irregexp regular expression engine.
693 // 703 //
694 // The Irregexp regular expression engine is intended to be a complete 704 // The Irregexp regular expression engine is intended to be a complete
695 // implementation of ECMAScript regular expressions. It generates either 705 // implementation of ECMAScript regular expressions. It generates either
696 // bytecodes or native code. 706 // bytecodes or native code.
697 707
698 // The Irregexp regexp engine is structured in three steps. 708 // The Irregexp regexp engine is structured in three steps.
699 // 1) The parser generates an abstract syntax tree. See ast.cc. 709 // 1) The parser generates an abstract syntax tree. See ast.cc.
(...skipping 5819 matching lines...) Expand 10 before | Expand all | Expand 10 after
6519 RegExpEngine::CompilationResult RegExpEngine::Compile( 6529 RegExpEngine::CompilationResult RegExpEngine::Compile(
6520 Isolate* isolate, Zone* zone, RegExpCompileData* data, 6530 Isolate* isolate, Zone* zone, RegExpCompileData* data,
6521 JSRegExp::Flags flags, Handle<String> pattern, 6531 JSRegExp::Flags flags, Handle<String> pattern,
6522 Handle<String> sample_subject, bool is_one_byte) { 6532 Handle<String> sample_subject, bool is_one_byte) {
6523 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6533 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6524 return IrregexpRegExpTooBig(isolate); 6534 return IrregexpRegExpTooBig(isolate);
6525 } 6535 }
6526 bool ignore_case = flags & JSRegExp::kIgnoreCase; 6536 bool ignore_case = flags & JSRegExp::kIgnoreCase;
6527 bool is_sticky = flags & JSRegExp::kSticky; 6537 bool is_sticky = flags & JSRegExp::kSticky;
6528 bool is_global = flags & JSRegExp::kGlobal; 6538 bool is_global = flags & JSRegExp::kGlobal;
6539 bool is_unicode = flags & JSRegExp::kUnicode;
6529 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, 6540 RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
6530 is_one_byte); 6541 is_one_byte);
6531 6542
6532 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); 6543 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
6533 6544
6534 // Sample some characters from the middle of the string. 6545 // Sample some characters from the middle of the string.
6535 static const int kSampleSize = 128; 6546 static const int kSampleSize = 128;
6536 6547
6537 sample_subject = String::Flatten(sample_subject); 6548 sample_subject = String::Flatten(sample_subject);
6538 int chars_sampled = 0; 6549 int chars_sampled = 0;
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
6636 // Inserted here, instead of in Assembler, because it depends on information 6647 // Inserted here, instead of in Assembler, because it depends on information
6637 // in the AST that isn't replicated in the Node structure. 6648 // in the AST that isn't replicated in the Node structure.
6638 static const int kMaxBacksearchLimit = 1024; 6649 static const int kMaxBacksearchLimit = 1024;
6639 if (is_end_anchored && 6650 if (is_end_anchored &&
6640 !is_start_anchored && 6651 !is_start_anchored &&
6641 max_length < kMaxBacksearchLimit) { 6652 max_length < kMaxBacksearchLimit) {
6642 macro_assembler.SetCurrentPositionFromEnd(max_length); 6653 macro_assembler.SetCurrentPositionFromEnd(max_length);
6643 } 6654 }
6644 6655
6645 if (is_global) { 6656 if (is_global) {
6646 macro_assembler.set_global_mode( 6657 RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
6647 (data->tree->min_match() > 0) 6658 if (data->tree->min_match() > 0) {
6648 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 6659 mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
6649 : RegExpMacroAssembler::GLOBAL); 6660 } else if (is_unicode) {
6661 mode = RegExpMacroAssembler::GLOBAL_UNICODE;
6662 }
6663 macro_assembler.set_global_mode(mode);
6650 } 6664 }
6651 6665
6652 return compiler.Assemble(&macro_assembler, 6666 return compiler.Assemble(&macro_assembler,
6653 node, 6667 node,
6654 data->capture_count, 6668 data->capture_count,
6655 pattern); 6669 pattern);
6656 } 6670 }
6657 6671
6658 6672
6659 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { 6673 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
6762 6776
6763 6777
6764 void RegExpResultsCache::Clear(FixedArray* cache) { 6778 void RegExpResultsCache::Clear(FixedArray* cache) {
6765 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6779 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6766 cache->set(i, Smi::FromInt(0)); 6780 cache->set(i, Smi::FromInt(0));
6767 } 6781 }
6768 } 6782 }
6769 6783
6770 } // namespace internal 6784 } // namespace internal
6771 } // namespace v8 6785 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698