Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(485)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1630633002: [regexp] correctly advance zero length matches for global/unicode. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@backrefbounds
Patch Set: fix test Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/jsregexp-inl.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 620 matching lines...) Expand 10 before | Expand all | Expand 10 after
631 } 631 }
632 SetLastCaptureCount(array, capture_register_count); 632 SetLastCaptureCount(array, capture_register_count);
633 SetLastSubject(array, *subject); 633 SetLastSubject(array, *subject);
634 SetLastInput(array, *subject); 634 SetLastInput(array, *subject);
635 return last_match_info; 635 return last_match_info;
636 } 636 }
637 637
638 638
639 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, 639 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
640 Handle<String> subject, 640 Handle<String> subject,
641 bool is_global,
642 Isolate* isolate) 641 Isolate* isolate)
643 : register_array_(NULL), 642 : register_array_(NULL),
644 register_array_size_(0), 643 register_array_size_(0),
645 regexp_(regexp), 644 regexp_(regexp),
646 subject_(subject) { 645 subject_(subject) {
647 #ifdef V8_INTERPRETED_REGEXP 646 #ifdef V8_INTERPRETED_REGEXP
648 bool interpreted = true; 647 bool interpreted = true;
649 #else 648 #else
650 bool interpreted = false; 649 bool interpreted = false;
651 #endif // V8_INTERPRETED_REGEXP 650 #endif // V8_INTERPRETED_REGEXP
652 651
653 if (regexp_->TypeTag() == JSRegExp::ATOM) { 652 if (regexp_->TypeTag() == JSRegExp::ATOM) {
654 static const int kAtomRegistersPerMatch = 2; 653 static const int kAtomRegistersPerMatch = 2;
655 registers_per_match_ = kAtomRegistersPerMatch; 654 registers_per_match_ = kAtomRegistersPerMatch;
656 // There is no distinction between interpreted and native for atom regexps. 655 // There is no distinction between interpreted and native for atom regexps.
657 interpreted = false; 656 interpreted = false;
658 } else { 657 } else {
659 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); 658 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_);
660 if (registers_per_match_ < 0) { 659 if (registers_per_match_ < 0) {
661 num_matches_ = -1; // Signal exception. 660 num_matches_ = -1; // Signal exception.
662 return; 661 return;
663 } 662 }
664 } 663 }
665 664
666 if (is_global && !interpreted) { 665 DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal);
666 if (!interpreted) {
667 register_array_size_ = 667 register_array_size_ =
668 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); 668 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
669 max_matches_ = register_array_size_ / registers_per_match_; 669 max_matches_ = register_array_size_ / registers_per_match_;
670 } else { 670 } else {
671 // Global loop in interpreted regexp is not implemented. We choose 671 // Global loop in interpreted regexp is not implemented. We choose
672 // the size of the offsets vector so that it can only store one match. 672 // the size of the offsets vector so that it can only store one match.
673 register_array_size_ = registers_per_match_; 673 register_array_size_ = registers_per_match_;
674 max_matches_ = 1; 674 max_matches_ = 1;
675 } 675 }
676 676
677 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { 677 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
678 register_array_ = NewArray<int32_t>(register_array_size_); 678 register_array_ = NewArray<int32_t>(register_array_size_);
679 } else { 679 } else {
680 register_array_ = isolate->jsregexp_static_offsets_vector(); 680 register_array_ = isolate->jsregexp_static_offsets_vector();
681 } 681 }
682 682
683 // Set state so that fetching the results the first time triggers a call 683 // Set state so that fetching the results the first time triggers a call
684 // to the compiled regexp. 684 // to the compiled regexp.
685 current_match_index_ = max_matches_ - 1; 685 current_match_index_ = max_matches_ - 1;
686 num_matches_ = max_matches_; 686 num_matches_ = max_matches_;
687 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. 687 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture.
688 DCHECK_GE(register_array_size_, registers_per_match_); 688 DCHECK_GE(register_array_size_, registers_per_match_);
689 int32_t* last_match = 689 int32_t* last_match =
690 &register_array_[current_match_index_ * registers_per_match_]; 690 &register_array_[current_match_index_ * registers_per_match_];
691 last_match[0] = -1; 691 last_match[0] = -1;
692 last_match[1] = 0; 692 last_match[1] = 0;
693 } 693 }
694 694
695 int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
696 if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 &&
697 last_index + 1 < subject_->length() &&
698 unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
699 unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
700 // Advance over the surrogate pair.
701 return last_index + 2;
702 }
703 return last_index + 1;
704 }
695 705
696 // ------------------------------------------------------------------- 706 // -------------------------------------------------------------------
697 // Implementation of the Irregexp regular expression engine. 707 // Implementation of the Irregexp regular expression engine.
698 // 708 //
699 // The Irregexp regular expression engine is intended to be a complete 709 // The Irregexp regular expression engine is intended to be a complete
700 // implementation of ECMAScript regular expressions. It generates either 710 // implementation of ECMAScript regular expressions. It generates either
701 // bytecodes or native code. 711 // bytecodes or native code.
702 712
703 // The Irregexp regexp engine is structured in three steps. 713 // The Irregexp regexp engine is structured in three steps.
704 // 1) The parser generates an abstract syntax tree. See ast.cc. 714 // 1) The parser generates an abstract syntax tree. See ast.cc.
(...skipping 5911 matching lines...) Expand 10 before | Expand all | Expand 10 after
6616 RegExpEngine::CompilationResult RegExpEngine::Compile( 6626 RegExpEngine::CompilationResult RegExpEngine::Compile(
6617 Isolate* isolate, Zone* zone, RegExpCompileData* data, 6627 Isolate* isolate, Zone* zone, RegExpCompileData* data,
6618 JSRegExp::Flags flags, Handle<String> pattern, 6628 JSRegExp::Flags flags, Handle<String> pattern,
6619 Handle<String> sample_subject, bool is_one_byte) { 6629 Handle<String> sample_subject, bool is_one_byte) {
6620 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6630 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6621 return IrregexpRegExpTooBig(isolate); 6631 return IrregexpRegExpTooBig(isolate);
6622 } 6632 }
6623 bool ignore_case = flags & JSRegExp::kIgnoreCase; 6633 bool ignore_case = flags & JSRegExp::kIgnoreCase;
6624 bool is_sticky = flags & JSRegExp::kSticky; 6634 bool is_sticky = flags & JSRegExp::kSticky;
6625 bool is_global = flags & JSRegExp::kGlobal; 6635 bool is_global = flags & JSRegExp::kGlobal;
6636 bool is_unicode = flags & JSRegExp::kUnicode;
6626 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, 6637 RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
6627 is_one_byte); 6638 is_one_byte);
6628 6639
6629 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); 6640 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
6630 6641
6631 // Sample some characters from the middle of the string. 6642 // Sample some characters from the middle of the string.
6632 static const int kSampleSize = 128; 6643 static const int kSampleSize = 128;
6633 6644
6634 sample_subject = String::Flatten(sample_subject); 6645 sample_subject = String::Flatten(sample_subject);
6635 int chars_sampled = 0; 6646 int chars_sampled = 0;
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
6735 // Inserted here, instead of in Assembler, because it depends on information 6746 // Inserted here, instead of in Assembler, because it depends on information
6736 // in the AST that isn't replicated in the Node structure. 6747 // in the AST that isn't replicated in the Node structure.
6737 static const int kMaxBacksearchLimit = 1024; 6748 static const int kMaxBacksearchLimit = 1024;
6738 if (is_end_anchored && 6749 if (is_end_anchored &&
6739 !is_start_anchored && 6750 !is_start_anchored &&
6740 max_length < kMaxBacksearchLimit) { 6751 max_length < kMaxBacksearchLimit) {
6741 macro_assembler.SetCurrentPositionFromEnd(max_length); 6752 macro_assembler.SetCurrentPositionFromEnd(max_length);
6742 } 6753 }
6743 6754
6744 if (is_global) { 6755 if (is_global) {
6745 macro_assembler.set_global_mode( 6756 RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
6746 (data->tree->min_match() > 0) 6757 if (data->tree->min_match() > 0) {
6747 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 6758 mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
6748 : RegExpMacroAssembler::GLOBAL); 6759 } else if (is_unicode) {
6760 mode = RegExpMacroAssembler::GLOBAL_UNICODE;
6761 }
6762 macro_assembler.set_global_mode(mode);
6749 } 6763 }
6750 6764
6751 return compiler.Assemble(&macro_assembler, 6765 return compiler.Assemble(&macro_assembler,
6752 node, 6766 node,
6753 data->capture_count, 6767 data->capture_count,
6754 pattern); 6768 pattern);
6755 } 6769 }
6756 6770
6757 6771
6758 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { 6772 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
6861 6875
6862 6876
6863 void RegExpResultsCache::Clear(FixedArray* cache) { 6877 void RegExpResultsCache::Clear(FixedArray* cache) {
6864 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6878 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6865 cache->set(i, Smi::FromInt(0)); 6879 cache->set(i, Smi::FromInt(0));
6866 } 6880 }
6867 } 6881 }
6868 6882
6869 } // namespace internal 6883 } // namespace internal
6870 } // namespace v8 6884 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/jsregexp-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698