| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 620 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 631 } | 631 } |
| 632 SetLastCaptureCount(array, capture_register_count); | 632 SetLastCaptureCount(array, capture_register_count); |
| 633 SetLastSubject(array, *subject); | 633 SetLastSubject(array, *subject); |
| 634 SetLastInput(array, *subject); | 634 SetLastInput(array, *subject); |
| 635 return last_match_info; | 635 return last_match_info; |
| 636 } | 636 } |
| 637 | 637 |
| 638 | 638 |
| 639 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, | 639 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, |
| 640 Handle<String> subject, | 640 Handle<String> subject, |
| 641 bool is_global, | |
| 642 Isolate* isolate) | 641 Isolate* isolate) |
| 643 : register_array_(NULL), | 642 : register_array_(NULL), |
| 644 register_array_size_(0), | 643 register_array_size_(0), |
| 645 regexp_(regexp), | 644 regexp_(regexp), |
| 646 subject_(subject) { | 645 subject_(subject) { |
| 647 #ifdef V8_INTERPRETED_REGEXP | 646 #ifdef V8_INTERPRETED_REGEXP |
| 648 bool interpreted = true; | 647 bool interpreted = true; |
| 649 #else | 648 #else |
| 650 bool interpreted = false; | 649 bool interpreted = false; |
| 651 #endif // V8_INTERPRETED_REGEXP | 650 #endif // V8_INTERPRETED_REGEXP |
| 652 | 651 |
| 653 if (regexp_->TypeTag() == JSRegExp::ATOM) { | 652 if (regexp_->TypeTag() == JSRegExp::ATOM) { |
| 654 static const int kAtomRegistersPerMatch = 2; | 653 static const int kAtomRegistersPerMatch = 2; |
| 655 registers_per_match_ = kAtomRegistersPerMatch; | 654 registers_per_match_ = kAtomRegistersPerMatch; |
| 656 // There is no distinction between interpreted and native for atom regexps. | 655 // There is no distinction between interpreted and native for atom regexps. |
| 657 interpreted = false; | 656 interpreted = false; |
| 658 } else { | 657 } else { |
| 659 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); | 658 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); |
| 660 if (registers_per_match_ < 0) { | 659 if (registers_per_match_ < 0) { |
| 661 num_matches_ = -1; // Signal exception. | 660 num_matches_ = -1; // Signal exception. |
| 662 return; | 661 return; |
| 663 } | 662 } |
| 664 } | 663 } |
| 665 | 664 |
| 666 if (is_global && !interpreted) { | 665 DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal); |
| 666 if (!interpreted) { |
| 667 register_array_size_ = | 667 register_array_size_ = |
| 668 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); | 668 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); |
| 669 max_matches_ = register_array_size_ / registers_per_match_; | 669 max_matches_ = register_array_size_ / registers_per_match_; |
| 670 } else { | 670 } else { |
| 671 // Global loop in interpreted regexp is not implemented. We choose | 671 // Global loop in interpreted regexp is not implemented. We choose |
| 672 // the size of the offsets vector so that it can only store one match. | 672 // the size of the offsets vector so that it can only store one match. |
| 673 register_array_size_ = registers_per_match_; | 673 register_array_size_ = registers_per_match_; |
| 674 max_matches_ = 1; | 674 max_matches_ = 1; |
| 675 } | 675 } |
| 676 | 676 |
| 677 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | 677 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
| 678 register_array_ = NewArray<int32_t>(register_array_size_); | 678 register_array_ = NewArray<int32_t>(register_array_size_); |
| 679 } else { | 679 } else { |
| 680 register_array_ = isolate->jsregexp_static_offsets_vector(); | 680 register_array_ = isolate->jsregexp_static_offsets_vector(); |
| 681 } | 681 } |
| 682 | 682 |
| 683 // Set state so that fetching the results the first time triggers a call | 683 // Set state so that fetching the results the first time triggers a call |
| 684 // to the compiled regexp. | 684 // to the compiled regexp. |
| 685 current_match_index_ = max_matches_ - 1; | 685 current_match_index_ = max_matches_ - 1; |
| 686 num_matches_ = max_matches_; | 686 num_matches_ = max_matches_; |
| 687 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. | 687 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. |
| 688 DCHECK_GE(register_array_size_, registers_per_match_); | 688 DCHECK_GE(register_array_size_, registers_per_match_); |
| 689 int32_t* last_match = | 689 int32_t* last_match = |
| 690 ®ister_array_[current_match_index_ * registers_per_match_]; | 690 ®ister_array_[current_match_index_ * registers_per_match_]; |
| 691 last_match[0] = -1; | 691 last_match[0] = -1; |
| 692 last_match[1] = 0; | 692 last_match[1] = 0; |
| 693 } | 693 } |
| 694 | 694 |
| 695 int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) { |
| 696 if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 && |
| 697 last_index + 1 < subject_->length() && |
| 698 unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) && |
| 699 unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) { |
| 700 // Advance over the surrogate pair. |
| 701 return last_index + 2; |
| 702 } |
| 703 return last_index + 1; |
| 704 } |
| 695 | 705 |
| 696 // ------------------------------------------------------------------- | 706 // ------------------------------------------------------------------- |
| 697 // Implementation of the Irregexp regular expression engine. | 707 // Implementation of the Irregexp regular expression engine. |
| 698 // | 708 // |
| 699 // The Irregexp regular expression engine is intended to be a complete | 709 // The Irregexp regular expression engine is intended to be a complete |
| 700 // implementation of ECMAScript regular expressions. It generates either | 710 // implementation of ECMAScript regular expressions. It generates either |
| 701 // bytecodes or native code. | 711 // bytecodes or native code. |
| 702 | 712 |
| 703 // The Irregexp regexp engine is structured in three steps. | 713 // The Irregexp regexp engine is structured in three steps. |
| 704 // 1) The parser generates an abstract syntax tree. See ast.cc. | 714 // 1) The parser generates an abstract syntax tree. See ast.cc. |
| (...skipping 5911 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6616 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6626 RegExpEngine::CompilationResult RegExpEngine::Compile( |
| 6617 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6627 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
| 6618 JSRegExp::Flags flags, Handle<String> pattern, | 6628 JSRegExp::Flags flags, Handle<String> pattern, |
| 6619 Handle<String> sample_subject, bool is_one_byte) { | 6629 Handle<String> sample_subject, bool is_one_byte) { |
| 6620 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6630 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 6621 return IrregexpRegExpTooBig(isolate); | 6631 return IrregexpRegExpTooBig(isolate); |
| 6622 } | 6632 } |
| 6623 bool ignore_case = flags & JSRegExp::kIgnoreCase; | 6633 bool ignore_case = flags & JSRegExp::kIgnoreCase; |
| 6624 bool is_sticky = flags & JSRegExp::kSticky; | 6634 bool is_sticky = flags & JSRegExp::kSticky; |
| 6625 bool is_global = flags & JSRegExp::kGlobal; | 6635 bool is_global = flags & JSRegExp::kGlobal; |
| 6636 bool is_unicode = flags & JSRegExp::kUnicode; |
| 6626 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, | 6637 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, |
| 6627 is_one_byte); | 6638 is_one_byte); |
| 6628 | 6639 |
| 6629 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); | 6640 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); |
| 6630 | 6641 |
| 6631 // Sample some characters from the middle of the string. | 6642 // Sample some characters from the middle of the string. |
| 6632 static const int kSampleSize = 128; | 6643 static const int kSampleSize = 128; |
| 6633 | 6644 |
| 6634 sample_subject = String::Flatten(sample_subject); | 6645 sample_subject = String::Flatten(sample_subject); |
| 6635 int chars_sampled = 0; | 6646 int chars_sampled = 0; |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6735 // Inserted here, instead of in Assembler, because it depends on information | 6746 // Inserted here, instead of in Assembler, because it depends on information |
| 6736 // in the AST that isn't replicated in the Node structure. | 6747 // in the AST that isn't replicated in the Node structure. |
| 6737 static const int kMaxBacksearchLimit = 1024; | 6748 static const int kMaxBacksearchLimit = 1024; |
| 6738 if (is_end_anchored && | 6749 if (is_end_anchored && |
| 6739 !is_start_anchored && | 6750 !is_start_anchored && |
| 6740 max_length < kMaxBacksearchLimit) { | 6751 max_length < kMaxBacksearchLimit) { |
| 6741 macro_assembler.SetCurrentPositionFromEnd(max_length); | 6752 macro_assembler.SetCurrentPositionFromEnd(max_length); |
| 6742 } | 6753 } |
| 6743 | 6754 |
| 6744 if (is_global) { | 6755 if (is_global) { |
| 6745 macro_assembler.set_global_mode( | 6756 RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL; |
| 6746 (data->tree->min_match() > 0) | 6757 if (data->tree->min_match() > 0) { |
| 6747 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 6758 mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK; |
| 6748 : RegExpMacroAssembler::GLOBAL); | 6759 } else if (is_unicode) { |
| 6760 mode = RegExpMacroAssembler::GLOBAL_UNICODE; |
| 6761 } |
| 6762 macro_assembler.set_global_mode(mode); |
| 6749 } | 6763 } |
| 6750 | 6764 |
| 6751 return compiler.Assemble(¯o_assembler, | 6765 return compiler.Assemble(¯o_assembler, |
| 6752 node, | 6766 node, |
| 6753 data->capture_count, | 6767 data->capture_count, |
| 6754 pattern); | 6768 pattern); |
| 6755 } | 6769 } |
| 6756 | 6770 |
| 6757 | 6771 |
| 6758 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { | 6772 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6861 | 6875 |
| 6862 | 6876 |
| 6863 void RegExpResultsCache::Clear(FixedArray* cache) { | 6877 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6864 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6878 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6865 cache->set(i, Smi::FromInt(0)); | 6879 cache->set(i, Smi::FromInt(0)); |
| 6866 } | 6880 } |
| 6867 } | 6881 } |
| 6868 | 6882 |
| 6869 } // namespace internal | 6883 } // namespace internal |
| 6870 } // namespace v8 | 6884 } // namespace v8 |
| OLD | NEW |