Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/v8.h" | 5 #include "src/v8.h" |
| 6 | 6 |
| 7 #include "src/ast.h" | 7 #include "src/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 4799 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4810 return false; | 4810 return false; |
| 4811 } | 4811 } |
| 4812 | 4812 |
| 4813 | 4813 |
| 4814 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4814 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 4815 RegExpNode* on_success) { | 4815 RegExpNode* on_success) { |
| 4816 return new(compiler->zone()) TextNode(this, on_success); | 4816 return new(compiler->zone()) TextNode(this, on_success); |
| 4817 } | 4817 } |
| 4818 | 4818 |
| 4819 | 4819 |
| 4820 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | |
| 4821 RegExpAtom* atom1 = (*a)->AsAtom(); | |
| 4822 RegExpAtom* atom2 = (*b)->AsAtom(); | |
| 4823 uc16 character1 = atom1->data().at(0); | |
| 4824 uc16 character2 = atom2->data().at(0); | |
| 4825 if (character1 < character2) return -1; | |
| 4826 if (character1 > character2) return 1; | |
| 4827 return 0; | |
| 4828 } | |
| 4829 | |
| 4830 | |
| 4831 // We can stable sort runs of atoms, since the order does not matter if they | |
| 4832 // start with different characters. | |
| 4833 // Returns true if any consecutive atoms were found. | |
| 4834 bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { | |
| 4835 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | |
| 4836 int length = alternatives->length(); | |
| 4837 bool found_consecutive_atoms = false; | |
| 4838 for (int i = 0; i < length; i++) { | |
| 4839 while (i < length) { | |
| 4840 RegExpTree* alternative = alternatives->at(i); | |
| 4841 if (alternative->IsAtom()) break; | |
| 4842 i++; | |
| 4843 } | |
| 4844 // i is length or it is the index of an atom. | |
| 4845 if (i == length) break; | |
|
Erik Corry Chromium.org
2015/06/09 20:27:11
This is the fix.
| |
| 4846 int first_atom = i; | |
| 4847 i++; | |
| 4848 while (i < length) { | |
| 4849 RegExpTree* alternative = alternatives->at(i); | |
| 4850 if (!alternative->IsAtom()) break; | |
| 4851 i++; | |
| 4852 } | |
| 4853 // Sort atoms to get ones with common prefixes together. | |
| 4854 // This step is not valid if we are in a case-independent regexp, | |
| 4855 // because it would change /is|I/ to /I|is/, and order matters when | |
| 4856 // the regexp parts don't match only disjoint starting points. To fix | |
| 4857 // this would need a version of CompareFirstChar that uses case- | |
| 4858 // independent character classes for comparison. | |
| 4859 if (!compiler->ignore_case()) { | |
| 4860 DCHECK_LT(first_atom, alternatives->length()); | |
| 4861 DCHECK_LE(i, alternatives->length()); | |
| 4862 DCHECK_LE(first_atom, i); | |
| 4863 alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom); | |
| 4864 } | |
| 4865 if (i - first_atom > 1) found_consecutive_atoms = true; | |
| 4866 } | |
| 4867 return found_consecutive_atoms; | |
| 4868 } | |
| 4869 | |
| 4870 | |
| 4871 // Optimizes ab|ac|az to a(?:b|c|d). | |
| 4872 void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { | |
| 4873 Zone* zone = compiler->zone(); | |
| 4874 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | |
| 4875 int length = alternatives->length(); | |
| 4876 | |
| 4877 int write_posn = 0; | |
| 4878 int i = 0; | |
| 4879 while (i < length) { | |
| 4880 RegExpTree* alternative = alternatives->at(i); | |
| 4881 if (!alternative->IsAtom()) { | |
| 4882 alternatives->at(write_posn++) = alternatives->at(i); | |
| 4883 i++; | |
| 4884 continue; | |
| 4885 } | |
| 4886 RegExpAtom* atom = alternative->AsAtom(); | |
| 4887 uc16 common_prefix = atom->data().at(0); | |
| 4888 int first_with_prefix = i; | |
| 4889 int prefix_length = atom->length(); | |
| 4890 i++; | |
| 4891 while (i < length) { | |
| 4892 alternative = alternatives->at(i); | |
| 4893 if (!alternative->IsAtom()) break; | |
| 4894 atom = alternative->AsAtom(); | |
| 4895 if (atom->data().at(0) != common_prefix) break; | |
| 4896 prefix_length = Min(prefix_length, atom->length()); | |
| 4897 i++; | |
| 4898 } | |
| 4899 if (i > first_with_prefix + 2) { | |
| 4900 // Found worthwhile run of alternatives with common prefix of at least one | |
| 4901 // character. The sorting function above did not sort on more than one | |
| 4902 // character for reasons of correctness, but there may still be a longer | |
| 4903 // common prefix if the terms were similar or presorted in the input. | |
| 4904 // Find out how long the common prefix is. | |
| 4905 int run_length = i - first_with_prefix; | |
| 4906 atom = alternatives->at(first_with_prefix)->AsAtom(); | |
| 4907 for (int j = 1; j < run_length && prefix_length > 1; j++) { | |
| 4908 RegExpAtom* old_atom = | |
| 4909 alternatives->at(j + first_with_prefix)->AsAtom(); | |
| 4910 for (int k = 1; k < prefix_length; k++) { | |
| 4911 if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k; | |
| 4912 } | |
| 4913 } | |
| 4914 RegExpAtom* prefix = | |
| 4915 new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length)); | |
| 4916 ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); | |
| 4917 pair->Add(prefix, zone); | |
| 4918 ZoneList<RegExpTree*>* suffixes = | |
| 4919 new (zone) ZoneList<RegExpTree*>(run_length, zone); | |
| 4920 for (int j = 0; j < run_length; j++) { | |
| 4921 RegExpAtom* old_atom = | |
| 4922 alternatives->at(j + first_with_prefix)->AsAtom(); | |
| 4923 int len = old_atom->length(); | |
| 4924 if (len == prefix_length) { | |
| 4925 suffixes->Add(new (zone) RegExpEmpty(), zone); | |
| 4926 } else { | |
| 4927 RegExpTree* suffix = new (zone) RegExpAtom( | |
| 4928 old_atom->data().SubVector(prefix_length, old_atom->length())); | |
| 4929 suffixes->Add(suffix, zone); | |
| 4930 } | |
| 4931 } | |
| 4932 pair->Add(new (zone) RegExpDisjunction(suffixes), zone); | |
| 4933 alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); | |
| 4934 } else { | |
| 4935 // Just copy any non-worthwhile alternatives. | |
| 4936 for (int j = first_with_prefix; j < i; j++) { | |
| 4937 alternatives->at(write_posn++) = alternatives->at(j); | |
| 4938 } | |
| 4939 } | |
| 4940 } | |
| 4941 alternatives->Rewind(write_posn); // Trim end of array. | |
| 4942 } | |
| 4943 | |
| 4944 | |
| 4945 // Optimizes b|c|z to [bcz]. | |
| 4946 void RegExpDisjunction::FixSingleCharacterDisjunctions( | |
| 4947 RegExpCompiler* compiler) { | |
| 4948 Zone* zone = compiler->zone(); | |
| 4949 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | |
| 4950 int length = alternatives->length(); | |
| 4951 | |
| 4952 int write_posn = 0; | |
| 4953 int i = 0; | |
| 4954 while (i < length) { | |
| 4955 RegExpTree* alternative = alternatives->at(i); | |
| 4956 if (!alternative->IsAtom()) { | |
| 4957 alternatives->at(write_posn++) = alternatives->at(i); | |
| 4958 i++; | |
| 4959 continue; | |
| 4960 } | |
| 4961 RegExpAtom* atom = alternative->AsAtom(); | |
| 4962 if (atom->length() != 1) { | |
| 4963 alternatives->at(write_posn++) = alternatives->at(i); | |
| 4964 i++; | |
| 4965 continue; | |
| 4966 } | |
| 4967 int first_in_run = i; | |
| 4968 i++; | |
| 4969 while (i < length) { | |
| 4970 alternative = alternatives->at(i); | |
| 4971 if (!alternative->IsAtom()) break; | |
| 4972 atom = alternative->AsAtom(); | |
| 4973 if (atom->length() != 1) break; | |
| 4974 i++; | |
| 4975 } | |
| 4976 if (i > first_in_run + 1) { | |
| 4977 // Found non-trivial run of single-character alternatives. | |
| 4978 int run_length = i - first_in_run; | |
| 4979 ZoneList<CharacterRange>* ranges = | |
| 4980 new (zone) ZoneList<CharacterRange>(2, zone); | |
| 4981 for (int j = 0; j < run_length; j++) { | |
| 4982 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); | |
| 4983 DCHECK_EQ(old_atom->length(), 1); | |
| 4984 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); | |
| 4985 } | |
| 4986 alternatives->at(write_posn++) = | |
| 4987 new (zone) RegExpCharacterClass(ranges, false); | |
| 4988 } else { | |
| 4989 // Just copy any trivial alternatives. | |
| 4990 for (int j = first_in_run; j < i; j++) { | |
| 4991 alternatives->at(write_posn++) = alternatives->at(j); | |
| 4992 } | |
| 4993 } | |
| 4994 } | |
| 4995 alternatives->Rewind(write_posn); // Trim end of array. | |
| 4996 } | |
| 4997 | |
| 4998 | |
| 4820 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 4999 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 4821 RegExpNode* on_success) { | 5000 RegExpNode* on_success) { |
| 4822 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 5001 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 4823 int length = alternatives->length(); | 5002 |
| 5003 if (alternatives->length() > 2) { | |
| 5004 bool found_consecutive_atoms = SortConsecutiveAtoms(compiler); | |
| 5005 if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler); | |
| 5006 FixSingleCharacterDisjunctions(compiler); | |
| 5007 if (alternatives->length() == 1) { | |
| 5008 return alternatives->at(0)->ToNode(compiler, on_success); | |
| 5009 } | |
| 5010 } | |
| 5011 | |
| 5012 int length = alternatives->length(); | |
| 5013 | |
| 4824 ChoiceNode* result = | 5014 ChoiceNode* result = |
| 4825 new(compiler->zone()) ChoiceNode(length, compiler->zone()); | 5015 new(compiler->zone()) ChoiceNode(length, compiler->zone()); |
| 4826 for (int i = 0; i < length; i++) { | 5016 for (int i = 0; i < length; i++) { |
| 4827 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 5017 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
| 4828 on_success)); | 5018 on_success)); |
| 4829 result->AddAlternative(alternative); | 5019 result->AddAlternative(alternative); |
| 4830 } | 5020 } |
| 4831 return result; | 5021 return result; |
| 4832 } | 5022 } |
| 4833 | 5023 |
| (...skipping 1322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6156 bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize; | 6346 bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize; |
| 6157 if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit && | 6347 if (heap->total_regexp_code_generated() > RegExpImpl::kRegExpCompiledLimit && |
| 6158 heap->isolate()->memory_allocator()->SizeExecutable() > | 6348 heap->isolate()->memory_allocator()->SizeExecutable() > |
| 6159 RegExpImpl::kRegExpExecutableMemoryLimit) { | 6349 RegExpImpl::kRegExpExecutableMemoryLimit) { |
| 6160 too_much = true; | 6350 too_much = true; |
| 6161 } | 6351 } |
| 6162 return too_much; | 6352 return too_much; |
| 6163 } | 6353 } |
| 6164 } // namespace internal | 6354 } // namespace internal |
| 6165 } // namespace v8 | 6355 } // namespace v8 |
| OLD | NEW |