OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/v8.h" | 5 #include "src/v8.h" |
6 | 6 |
7 #include "src/ast.h" | 7 #include "src/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 4799 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4810 return false; | 4810 return false; |
4811 } | 4811 } |
4812 | 4812 |
4813 | 4813 |
4814 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4814 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
4815 RegExpNode* on_success) { | 4815 RegExpNode* on_success) { |
4816 return new(compiler->zone()) TextNode(this, on_success); | 4816 return new(compiler->zone()) TextNode(this, on_success); |
4817 } | 4817 } |
4818 | 4818 |
4819 | 4819 |
4820 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | |
4821 RegExpAtom* atom1 = (*a)->AsAtom(); | |
4822 RegExpAtom* atom2 = (*b)->AsAtom(); | |
4823 uc16 character1 = atom1->data().at(0); | |
4824 uc16 character2 = atom2->data().at(0); | |
4825 if (character1 < character2) return -1; | |
4826 if (character1 > character2) return 1; | |
4827 return 0; | |
4828 } | |
4829 | |
4830 | |
4831 // We can stable sort runs of atoms of at least length 0, since the order | |
4832 // does not matter. | |
4833 // Returns true if any consecutive atoms were found. | |
4834 bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { | |
4835 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | |
4836 int length = alternatives->length(); | |
4837 bool found_consecutive_atoms = false; | |
4838 for (int i = 0; i < length; i++) { | |
4839 while (i < length) { | |
4840 RegExpTree* alternative = alternatives->at(i); | |
4841 if (alternative->IsAtom() && alternative->AsAtom()->length() > 0) break; | |
4842 i++; | |
4843 } | |
4844 // i is length or it is the index of an atom. | |
4845 int first_atom = i; | |
4846 i++; | |
4847 while (i < length) { | |
4848 RegExpTree* alternative = alternatives->at(i); | |
4849 if (!alternative->IsAtom()) break; | |
4850 if (alternative->AsAtom()->length() == 0) break; | |
4851 i++; | |
4852 } | |
4853 // Sort atoms to get ones with common prefixes together. | |
4854 // This step is not valid if we are in a case-independent regexp, | |
4855 // because it would change /is|I/ to /I|is/, and order matters when | |
4856 // the regexp parts don't match only disjoint starting points. | |
4857 if (!compiler->ignore_case()) { | |
4858 alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom); | |
4859 } | |
4860 if (i - first_atom > 1) found_consecutive_atoms = true; | |
4861 } | |
4862 return found_consecutive_atoms; | |
4863 } | |
4864 | |
4865 | |
4866 // Optimizes ab|ac|az to a(?:b|c|d). | |
4867 void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { | |
4868 Zone* zone = compiler->zone(); | |
4869 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | |
4870 int length = alternatives->length(); | |
4871 | |
4872 int write_posn = 0; | |
4873 int i = 0; | |
4874 while (i < length) { | |
4875 RegExpTree* alternative = alternatives->at(i); | |
4876 if (!alternative->IsAtom()) { | |
4877 alternatives->at(write_posn++) = alternatives->at(i); | |
4878 i++; | |
4879 continue; | |
4880 } | |
4881 RegExpAtom* atom = alternative->AsAtom(); | |
4882 if (atom->length() < 1) { | |
4883 alternatives->at(write_posn++) = alternatives->at(i); | |
4884 i++; | |
4885 continue; | |
4886 } | |
4887 uc16 common_prefix = atom->data().at(0); | |
4888 int first_with_prefix = i; | |
4889 i++; | |
4890 while (i < length) { | |
4891 alternative = alternatives->at(i); | |
4892 if (!alternative->IsAtom()) break; | |
4893 atom = alternative->AsAtom(); | |
4894 if (atom->length() < 1) break; | |
4895 if (atom->data().at(0) != common_prefix) break; | |
4896 i++; | |
4897 } | |
4898 if (i > first_with_prefix + 2) { | |
4899 // Found worthwhile run of alternatives with common prefix. | |
4900 int run_length = i - first_with_prefix; | |
4901 ZoneList<uc16>* prefix_list = new (zone) ZoneList<uc16>(1, zone); | |
4902 prefix_list->Add(common_prefix, zone); | |
4903 RegExpAtom* prefix = new (zone) RegExpAtom(prefix_list->ToConstVector()); | |
4904 ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); | |
4905 pair->Add(prefix, zone); | |
4906 ZoneList<RegExpTree*>* suffixes = | |
4907 new (zone) ZoneList<RegExpTree*>(run_length, zone); | |
4908 for (int j = 0; j < run_length; j++) { | |
4909 RegExpAtom* old_atom = | |
4910 alternatives->at(j + first_with_prefix)->AsAtom(); | |
4911 int len = old_atom->length(); | |
4912 if (len == 0) { | |
4913 suffixes->Add(new (zone) RegExpEmpty(), zone); | |
4914 } else { | |
4915 ZoneList<uc16>* suffix_list = | |
4916 new (zone) ZoneList<uc16>(len - 1, zone); | |
4917 for (int k = 1; k < len; k++) | |
4918 suffix_list->Add(old_atom->data().at(k), zone); | |
4919 RegExpTree* suffix = | |
4920 new (zone) RegExpAtom(suffix_list->ToConstVector()); | |
Yang
2015/06/09 10:59:35
We could just use old_atom->data()->SubVector(1, l
| |
4921 suffixes->Add(suffix, zone); | |
4922 } | |
4923 } | |
4924 pair->Add(new (zone) RegExpDisjunction(suffixes), zone); | |
4925 alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); | |
4926 } else { | |
4927 // Just copy any non-worthwhile alternatives. | |
4928 for (int j = first_with_prefix; j < i; j++) { | |
4929 alternatives->at(write_posn++) = alternatives->at(j); | |
4930 } | |
4931 } | |
4932 } | |
4933 alternatives->Rewind(write_posn); // Trim end of array. | |
4934 } | |
4935 | |
4936 | |
4937 // Optimizes b|c|z to [bcz]. | |
4938 void RegExpDisjunction::FixSingleCharacterDisjunctions( | |
4939 RegExpCompiler* compiler) { | |
4940 Zone* zone = compiler->zone(); | |
4941 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | |
4942 int length = alternatives->length(); | |
4943 | |
4944 int write_posn = 0; | |
4945 int i = 0; | |
4946 while (i < length) { | |
4947 RegExpTree* alternative = alternatives->at(i); | |
4948 if (!alternative->IsAtom()) { | |
4949 alternatives->at(write_posn++) = alternatives->at(i); | |
4950 i++; | |
4951 continue; | |
4952 } | |
4953 RegExpAtom* atom = alternative->AsAtom(); | |
4954 if (atom->length() != 1) { | |
4955 alternatives->at(write_posn++) = alternatives->at(i); | |
4956 i++; | |
4957 continue; | |
4958 } | |
4959 int first_in_run = i; | |
4960 i++; | |
4961 while (i < length) { | |
4962 alternative = alternatives->at(i); | |
4963 if (!alternative->IsAtom()) break; | |
4964 atom = alternative->AsAtom(); | |
4965 if (atom->length() != 1) break; | |
4966 i++; | |
4967 } | |
4968 if (i > first_in_run + 1) { | |
4969 // Found non-trivial run of single-character alternatives. | |
4970 int run_length = i - first_in_run; | |
4971 ZoneList<CharacterRange>* ranges = | |
4972 new (zone) ZoneList<CharacterRange>(2, zone); | |
4973 for (int j = 0; j < run_length; j++) { | |
4974 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); | |
4975 DCHECK_EQ(old_atom->length(), 1); | |
4976 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); | |
4977 } | |
4978 alternatives->at(write_posn++) = | |
4979 new (zone) RegExpCharacterClass(ranges, false); | |
4980 } else { | |
4981 // Just copy any trivial alternatives. | |
4982 for (int j = first_in_run; j < i; j++) { | |
4983 alternatives->at(write_posn++) = alternatives->at(j); | |
4984 } | |
4985 } | |
4986 } | |
4987 alternatives->Rewind(write_posn); // Trim end of array. | |
4988 } | |
4989 | |
4990 | |
4820 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 4991 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
4821 RegExpNode* on_success) { | 4992 RegExpNode* on_success) { |
4822 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 4993 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
4994 | |
4995 if (alternatives->length() > 2) { | |
4996 bool found_consecutive_atoms = SortConsecutiveAtoms(compiler); | |
4997 if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler); | |
4998 FixSingleCharacterDisjunctions(compiler); | |
4999 if (alternatives->length() == 1) { | |
5000 return alternatives->at(0)->ToNode(compiler, on_success); | |
Yang
2015/06/09 10:59:35
I guess for something like aaaaaaaaaaaaaaab|aaaaaa
| |
5001 } | |
5002 } | |
5003 | |
4823 int length = alternatives->length(); | 5004 int length = alternatives->length(); |
5005 | |
4824 ChoiceNode* result = | 5006 ChoiceNode* result = |
4825 new(compiler->zone()) ChoiceNode(length, compiler->zone()); | 5007 new(compiler->zone()) ChoiceNode(length, compiler->zone()); |
4826 for (int i = 0; i < length; i++) { | 5008 for (int i = 0; i < length; i++) { |
4827 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 5009 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
4828 on_success)); | 5010 on_success)); |
4829 result->AddAlternative(alternative); | 5011 result->AddAlternative(alternative); |
4830 } | 5012 } |
4831 return result; | 5013 return result; |
4832 } | 5014 } |
4833 | 5015 |
(...skipping 1322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6156 bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize; | 6338 bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize; |
6157 if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit && | 6339 if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit && |
6158 heap->isolate()->memory_allocator()->SizeExecutable() > | 6340 heap->isolate()->memory_allocator()->SizeExecutable() > |
6159 RegExpImpl::kRegExpExecutableMemoryLimit) { | 6341 RegExpImpl::kRegExpExecutableMemoryLimit) { |
6160 too_much = true; | 6342 too_much = true; |
6161 } | 6343 } |
6162 return too_much; | 6344 return too_much; |
6163 } | 6345 } |
6164 } // namespace internal | 6346 } // namespace internal |
6165 } // namespace v8 | 6347 } // namespace v8 |
OLD | NEW |