OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
6 | 6 |
7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 615 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
626 } | 626 } |
627 SetLastCaptureCount(array, capture_register_count); | 627 SetLastCaptureCount(array, capture_register_count); |
628 SetLastSubject(array, *subject); | 628 SetLastSubject(array, *subject); |
629 SetLastInput(array, *subject); | 629 SetLastInput(array, *subject); |
630 return last_match_info; | 630 return last_match_info; |
631 } | 631 } |
632 | 632 |
633 | 633 |
634 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, | 634 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, |
635 Handle<String> subject, | 635 Handle<String> subject, |
636 bool is_global, | |
637 Isolate* isolate) | 636 Isolate* isolate) |
638 : register_array_(NULL), | 637 : register_array_(NULL), |
639 register_array_size_(0), | 638 register_array_size_(0), |
640 regexp_(regexp), | 639 regexp_(regexp), |
641 subject_(subject) { | 640 subject_(subject) { |
642 #ifdef V8_INTERPRETED_REGEXP | 641 #ifdef V8_INTERPRETED_REGEXP |
643 bool interpreted = true; | 642 bool interpreted = true; |
644 #else | 643 #else |
645 bool interpreted = false; | 644 bool interpreted = false; |
646 #endif // V8_INTERPRETED_REGEXP | 645 #endif // V8_INTERPRETED_REGEXP |
647 | 646 |
648 if (regexp_->TypeTag() == JSRegExp::ATOM) { | 647 if (regexp_->TypeTag() == JSRegExp::ATOM) { |
649 static const int kAtomRegistersPerMatch = 2; | 648 static const int kAtomRegistersPerMatch = 2; |
650 registers_per_match_ = kAtomRegistersPerMatch; | 649 registers_per_match_ = kAtomRegistersPerMatch; |
651 // There is no distinction between interpreted and native for atom regexps. | 650 // There is no distinction between interpreted and native for atom regexps. |
652 interpreted = false; | 651 interpreted = false; |
653 } else { | 652 } else { |
654 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); | 653 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); |
655 if (registers_per_match_ < 0) { | 654 if (registers_per_match_ < 0) { |
656 num_matches_ = -1; // Signal exception. | 655 num_matches_ = -1; // Signal exception. |
657 return; | 656 return; |
658 } | 657 } |
659 } | 658 } |
660 | 659 |
661 if (is_global && !interpreted) { | 660 DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal); |
| 661 if (!interpreted) { |
662 register_array_size_ = | 662 register_array_size_ = |
663 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); | 663 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); |
664 max_matches_ = register_array_size_ / registers_per_match_; | 664 max_matches_ = register_array_size_ / registers_per_match_; |
665 } else { | 665 } else { |
666 // Global loop in interpreted regexp is not implemented. We choose | 666 // Global loop in interpreted regexp is not implemented. We choose |
667 // the size of the offsets vector so that it can only store one match. | 667 // the size of the offsets vector so that it can only store one match. |
668 register_array_size_ = registers_per_match_; | 668 register_array_size_ = registers_per_match_; |
669 max_matches_ = 1; | 669 max_matches_ = 1; |
670 } | 670 } |
671 | 671 |
672 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | 672 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { |
673 register_array_ = NewArray<int32_t>(register_array_size_); | 673 register_array_ = NewArray<int32_t>(register_array_size_); |
674 } else { | 674 } else { |
675 register_array_ = isolate->jsregexp_static_offsets_vector(); | 675 register_array_ = isolate->jsregexp_static_offsets_vector(); |
676 } | 676 } |
677 | 677 |
678 // Set state so that fetching the results the first time triggers a call | 678 // Set state so that fetching the results the first time triggers a call |
679 // to the compiled regexp. | 679 // to the compiled regexp. |
680 current_match_index_ = max_matches_ - 1; | 680 current_match_index_ = max_matches_ - 1; |
681 num_matches_ = max_matches_; | 681 num_matches_ = max_matches_; |
682 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. | 682 DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. |
683 DCHECK_GE(register_array_size_, registers_per_match_); | 683 DCHECK_GE(register_array_size_, registers_per_match_); |
684 int32_t* last_match = | 684 int32_t* last_match = |
685 ®ister_array_[current_match_index_ * registers_per_match_]; | 685 ®ister_array_[current_match_index_ * registers_per_match_]; |
686 last_match[0] = -1; | 686 last_match[0] = -1; |
687 last_match[1] = 0; | 687 last_match[1] = 0; |
688 } | 688 } |
689 | 689 |
| 690 int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) { |
| 691 if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 && |
| 692 last_index + 1 < subject_->length() && |
| 693 unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) && |
| 694 unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) { |
| 695 // Advance over the surrogate pair. |
| 696 return last_index + 2; |
| 697 } |
| 698 return last_index + 1; |
| 699 } |
690 | 700 |
691 // ------------------------------------------------------------------- | 701 // ------------------------------------------------------------------- |
692 // Implementation of the Irregexp regular expression engine. | 702 // Implementation of the Irregexp regular expression engine. |
693 // | 703 // |
694 // The Irregexp regular expression engine is intended to be a complete | 704 // The Irregexp regular expression engine is intended to be a complete |
695 // implementation of ECMAScript regular expressions. It generates either | 705 // implementation of ECMAScript regular expressions. It generates either |
696 // bytecodes or native code. | 706 // bytecodes or native code. |
697 | 707 |
698 // The Irregexp regexp engine is structured in three steps. | 708 // The Irregexp regexp engine is structured in three steps. |
699 // 1) The parser generates an abstract syntax tree. See ast.cc. | 709 // 1) The parser generates an abstract syntax tree. See ast.cc. |
(...skipping 5819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6519 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6529 RegExpEngine::CompilationResult RegExpEngine::Compile( |
6520 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6530 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
6521 JSRegExp::Flags flags, Handle<String> pattern, | 6531 JSRegExp::Flags flags, Handle<String> pattern, |
6522 Handle<String> sample_subject, bool is_one_byte) { | 6532 Handle<String> sample_subject, bool is_one_byte) { |
6523 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6533 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
6524 return IrregexpRegExpTooBig(isolate); | 6534 return IrregexpRegExpTooBig(isolate); |
6525 } | 6535 } |
6526 bool ignore_case = flags & JSRegExp::kIgnoreCase; | 6536 bool ignore_case = flags & JSRegExp::kIgnoreCase; |
6527 bool is_sticky = flags & JSRegExp::kSticky; | 6537 bool is_sticky = flags & JSRegExp::kSticky; |
6528 bool is_global = flags & JSRegExp::kGlobal; | 6538 bool is_global = flags & JSRegExp::kGlobal; |
| 6539 bool is_unicode = flags & JSRegExp::kUnicode; |
6529 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, | 6540 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, |
6530 is_one_byte); | 6541 is_one_byte); |
6531 | 6542 |
6532 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); | 6543 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); |
6533 | 6544 |
6534 // Sample some characters from the middle of the string. | 6545 // Sample some characters from the middle of the string. |
6535 static const int kSampleSize = 128; | 6546 static const int kSampleSize = 128; |
6536 | 6547 |
6537 sample_subject = String::Flatten(sample_subject); | 6548 sample_subject = String::Flatten(sample_subject); |
6538 int chars_sampled = 0; | 6549 int chars_sampled = 0; |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6636 // Inserted here, instead of in Assembler, because it depends on information | 6647 // Inserted here, instead of in Assembler, because it depends on information |
6637 // in the AST that isn't replicated in the Node structure. | 6648 // in the AST that isn't replicated in the Node structure. |
6638 static const int kMaxBacksearchLimit = 1024; | 6649 static const int kMaxBacksearchLimit = 1024; |
6639 if (is_end_anchored && | 6650 if (is_end_anchored && |
6640 !is_start_anchored && | 6651 !is_start_anchored && |
6641 max_length < kMaxBacksearchLimit) { | 6652 max_length < kMaxBacksearchLimit) { |
6642 macro_assembler.SetCurrentPositionFromEnd(max_length); | 6653 macro_assembler.SetCurrentPositionFromEnd(max_length); |
6643 } | 6654 } |
6644 | 6655 |
6645 if (is_global) { | 6656 if (is_global) { |
6646 macro_assembler.set_global_mode( | 6657 RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL; |
6647 (data->tree->min_match() > 0) | 6658 if (data->tree->min_match() > 0) { |
6648 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 6659 mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK; |
6649 : RegExpMacroAssembler::GLOBAL); | 6660 } else if (is_unicode) { |
| 6661 mode = RegExpMacroAssembler::GLOBAL_UNICODE; |
| 6662 } |
| 6663 macro_assembler.set_global_mode(mode); |
6650 } | 6664 } |
6651 | 6665 |
6652 return compiler.Assemble(¯o_assembler, | 6666 return compiler.Assemble(¯o_assembler, |
6653 node, | 6667 node, |
6654 data->capture_count, | 6668 data->capture_count, |
6655 pattern); | 6669 pattern); |
6656 } | 6670 } |
6657 | 6671 |
6658 | 6672 |
6659 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { | 6673 bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6762 | 6776 |
6763 | 6777 |
6764 void RegExpResultsCache::Clear(FixedArray* cache) { | 6778 void RegExpResultsCache::Clear(FixedArray* cache) { |
6765 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6779 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
6766 cache->set(i, Smi::FromInt(0)); | 6780 cache->set(i, Smi::FromInt(0)); |
6767 } | 6781 } |
6768 } | 6782 } |
6769 | 6783 |
6770 } // namespace internal | 6784 } // namespace internal |
6771 } // namespace v8 | 6785 } // namespace v8 |
OLD | NEW |