Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
| 6 | 6 |
| 7 #include "vm/dart_entry.h" | 7 #include "vm/dart_entry.h" |
| 8 #include "vm/regexp_assembler.h" | 8 #include "vm/regexp_assembler.h" |
| 9 #include "vm/regexp_assembler_bytecode.h" | 9 #include "vm/regexp_assembler_bytecode.h" |
| 10 #include "vm/regexp_assembler_ir.h" | 10 #include "vm/regexp_assembler_ir.h" |
| (...skipping 4806 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4817 | 4817 |
| 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
| 4819 RegExpCompileData* data, | 4819 RegExpCompileData* data, |
| 4820 const ParsedFunction* parsed_function, | 4820 const ParsedFunction* parsed_function, |
| 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { | 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { |
| 4822 ASSERT(!FLAG_interpret_irregexp); | 4822 ASSERT(!FLAG_interpret_irregexp); |
| 4823 Zone* zone = Thread::Current()->zone(); | 4823 Zone* zone = Thread::Current()->zone(); |
| 4824 | 4824 |
| 4825 const Function& function = parsed_function->function(); | 4825 const Function& function = parsed_function->function(); |
| 4826 const intptr_t specialization_cid = function.string_specialization_cid(); | 4826 const intptr_t specialization_cid = function.string_specialization_cid(); |
| 4827 const intptr_t is_sticky = function.is_sticky_specialization(); | |
| 4827 const bool is_one_byte = (specialization_cid == kOneByteStringCid || | 4828 const bool is_one_byte = (specialization_cid == kOneByteStringCid || |
| 4828 specialization_cid == kExternalOneByteStringCid); | 4829 specialization_cid == kExternalOneByteStringCid); |
| 4829 RegExp& regexp = RegExp::Handle(zone, function.regexp()); | 4830 RegExp& regexp = RegExp::Handle(zone, function.regexp()); |
| 4830 const String& pattern = String::Handle(zone, regexp.pattern()); | 4831 const String& pattern = String::Handle(zone, regexp.pattern()); |
| 4831 | 4832 |
| 4832 ASSERT(!regexp.IsNull()); | 4833 ASSERT(!regexp.IsNull()); |
| 4833 ASSERT(!pattern.IsNull()); | 4834 ASSERT(!pattern.IsNull()); |
| 4834 | 4835 |
| 4835 const bool ignore_case = regexp.is_ignore_case(); | 4836 const bool ignore_case = regexp.is_ignore_case(); |
| 4836 const bool is_global = regexp.is_global(); | 4837 const bool is_global = regexp.is_global(); |
| 4837 | 4838 |
| 4838 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); | 4839 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); |
| 4839 | 4840 |
| 4840 // TODO(zerny): Frequency sampling is currently disabled because of several | 4841 // TODO(zerny): Frequency sampling is currently disabled because of several |
| 4841 // issues. We do not want to store subject strings in the regexp object since | 4842 // issues. We do not want to store subject strings in the regexp object since |
| 4842 // they might be long and we should not prevent their garbage collection. | 4843 // they might be long and we should not prevent their garbage collection. |
| 4843 // Passing them to this function explicitly does not help, since we must | 4844 // Passing them to this function explicitly does not help, since we must |
| 4844 // generate exactly the same IR for both the unoptimizing and optimizing | 4845 // generate exactly the same IR for both the unoptimizing and optimizing |
| 4845 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4846 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
| 4846 // An option would be to store sampling results in the regexp object, but | 4847 // An option would be to store sampling results in the regexp object, but |
| 4847 // I'm not sure the performance gains are relevant enough. | 4848 // I'm not sure the performance gains are relevant enough. |
| 4848 | 4849 |
| 4849 // Wrap the body of the regexp in capture #0. | 4850 // Wrap the body of the regexp in capture #0. |
| 4850 RegExpNode* captured_body = | 4851 RegExpNode* captured_body = |
| 4851 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); | 4852 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
| 4852 | 4853 |
| 4853 RegExpNode* node = captured_body; | 4854 RegExpNode* node = captured_body; |
| 4854 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4855 const bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
| 4855 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4856 const bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
| 4856 intptr_t max_length = data->tree->max_match(); | 4857 intptr_t max_length = data->tree->max_match(); |
| 4857 if (!is_start_anchored) { | 4858 if (!is_start_anchored && !is_sticky) { |
| 4858 // Add a .*? at the beginning, outside the body capture, unless | 4859 // Add a .*? at the beginning, outside the body capture, unless |
|
rmacnak
2016/11/17 00:49:52
Not sure I see why this property is called sticky,
erikcorry
2016/11/17 13:56:17
From es-discuss (!):
I believe the y actually com
| |
| 4859 // this expression is anchored at the beginning. | 4860 // this expression is anchored at the beginning or is sticky. |
| 4860 RegExpNode* loop_node = RegExpQuantifier::ToNode( | 4861 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
| 4861 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), | 4862 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
| 4862 &compiler, captured_body, data->contains_anchor); | 4863 &compiler, captured_body, data->contains_anchor); |
| 4863 | 4864 |
| 4864 if (data->contains_anchor) { | 4865 if (data->contains_anchor) { |
| 4865 // Unroll loop once, to take care of the case that might start | 4866 // Unroll loop once, to take care of the case that might start |
| 4866 // at the start of input. | 4867 // at the start of input. |
| 4867 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); | 4868 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
| 4868 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 4869 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
| 4869 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( | 4870 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 4895 | 4896 |
| 4896 IRRegExpMacroAssembler* macro_assembler = | 4897 IRRegExpMacroAssembler* macro_assembler = |
| 4897 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, | 4898 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, |
| 4898 parsed_function, ic_data_array, zone); | 4899 parsed_function, ic_data_array, zone); |
| 4899 | 4900 |
| 4900 // Inserted here, instead of in Assembler, because it depends on information | 4901 // Inserted here, instead of in Assembler, because it depends on information |
| 4901 // in the AST that isn't replicated in the Node structure. | 4902 // in the AST that isn't replicated in the Node structure. |
| 4902 static const intptr_t kMaxBacksearchLimit = 1024; | 4903 static const intptr_t kMaxBacksearchLimit = 1024; |
| 4903 if (is_end_anchored && !is_start_anchored && | 4904 if (is_end_anchored && !is_start_anchored && |
| 4904 max_length < kMaxBacksearchLimit) { | 4905 max_length < kMaxBacksearchLimit) { |
| 4905 macro_assembler->SetCurrentPositionFromEnd(max_length); | 4906 macro_assembler->SetCurrentPositionFromEnd(max_length); |
|
erikcorry
2016/11/17 13:56:17
This needs fixing here too and we need a test for
Vyacheslav Egorov (Google)
2016/11/17 16:51:33
As we discussed I have fixed it here and I will po
| |
| 4906 } | 4907 } |
| 4907 | 4908 |
| 4908 if (is_global) { | 4909 if (is_global) { |
| 4909 macro_assembler->set_global_mode( | 4910 macro_assembler->set_global_mode( |
| 4910 (data->tree->min_match() > 0) | 4911 (data->tree->min_match() > 0) |
| 4911 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 4912 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
| 4912 : RegExpMacroAssembler::GLOBAL); | 4913 : RegExpMacroAssembler::GLOBAL); |
| 4913 } | 4914 } |
| 4914 | 4915 |
| 4915 RegExpEngine::CompilationResult result = | 4916 RegExpEngine::CompilationResult result = |
| 4916 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); | 4917 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
| 4917 | 4918 |
| 4918 if (FLAG_trace_irregexp) { | 4919 if (FLAG_trace_irregexp) { |
| 4919 macro_assembler->PrintBlocks(); | 4920 macro_assembler->PrintBlocks(); |
| 4920 } | 4921 } |
| 4921 | 4922 |
| 4922 return result; | 4923 return result; |
| 4923 } | 4924 } |
| 4924 | 4925 |
| 4925 | 4926 |
| 4926 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 4927 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
| 4927 RegExpCompileData* data, | 4928 RegExpCompileData* data, |
| 4928 const RegExp& regexp, | 4929 const RegExp& regexp, |
| 4929 bool is_one_byte, | 4930 bool is_one_byte, |
| 4931 bool is_sticky, | |
| 4930 Zone* zone) { | 4932 Zone* zone) { |
| 4931 ASSERT(FLAG_interpret_irregexp); | 4933 ASSERT(FLAG_interpret_irregexp); |
| 4932 const String& pattern = String::Handle(zone, regexp.pattern()); | 4934 const String& pattern = String::Handle(zone, regexp.pattern()); |
| 4933 | 4935 |
| 4934 ASSERT(!regexp.IsNull()); | 4936 ASSERT(!regexp.IsNull()); |
| 4935 ASSERT(!pattern.IsNull()); | 4937 ASSERT(!pattern.IsNull()); |
| 4936 | 4938 |
| 4937 const bool ignore_case = regexp.is_ignore_case(); | 4939 const bool ignore_case = regexp.is_ignore_case(); |
| 4938 const bool is_global = regexp.is_global(); | 4940 const bool is_global = regexp.is_global(); |
| 4939 | 4941 |
| 4940 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); | 4942 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); |
| 4941 | 4943 |
| 4942 // TODO(zerny): Frequency sampling is currently disabled because of several | 4944 // TODO(zerny): Frequency sampling is currently disabled because of several |
| 4943 // issues. We do not want to store subject strings in the regexp object since | 4945 // issues. We do not want to store subject strings in the regexp object since |
| 4944 // they might be long and we should not prevent their garbage collection. | 4946 // they might be long and we should not prevent their garbage collection. |
| 4945 // Passing them to this function explicitly does not help, since we must | 4947 // Passing them to this function explicitly does not help, since we must |
| 4946 // generate exactly the same IR for both the unoptimizing and optimizing | 4948 // generate exactly the same IR for both the unoptimizing and optimizing |
| 4947 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4949 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
| 4948 // An option would be to store sampling results in the regexp object, but | 4950 // An option would be to store sampling results in the regexp object, but |
| 4949 // I'm not sure the performance gains are relevant enough. | 4951 // I'm not sure the performance gains are relevant enough. |
| 4950 | 4952 |
| 4951 // Wrap the body of the regexp in capture #0. | 4953 // Wrap the body of the regexp in capture #0. |
| 4952 RegExpNode* captured_body = | 4954 RegExpNode* captured_body = |
| 4953 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); | 4955 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
| 4954 | 4956 |
| 4955 RegExpNode* node = captured_body; | 4957 RegExpNode* node = captured_body; |
| 4956 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4958 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
| 4957 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4959 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
| 4958 intptr_t max_length = data->tree->max_match(); | 4960 intptr_t max_length = data->tree->max_match(); |
| 4959 if (!is_start_anchored) { | 4961 if (!is_start_anchored && !is_sticky) { |
| 4960 // Add a .*? at the beginning, outside the body capture, unless | 4962 // Add a .*? at the beginning, outside the body capture, unless |
| 4961 // this expression is anchored at the beginning. | 4963 // this expression is anchored at the beginning. |
| 4962 RegExpNode* loop_node = RegExpQuantifier::ToNode( | 4964 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
| 4963 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), | 4965 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
| 4964 &compiler, captured_body, data->contains_anchor); | 4966 &compiler, captured_body, data->contains_anchor); |
| 4965 | 4967 |
| 4966 if (data->contains_anchor) { | 4968 if (data->contains_anchor) { |
| 4967 // Unroll loop once, to take care of the case that might start | 4969 // Unroll loop once, to take care of the case that might start |
| 4968 // at the start of input. | 4970 // at the start of input. |
| 4969 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); | 4971 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5022 } | 5024 } |
| 5023 | 5025 |
| 5024 return result; | 5026 return result; |
| 5025 } | 5027 } |
| 5026 | 5028 |
| 5027 | 5029 |
| 5028 static void CreateSpecializedFunction(Thread* thread, | 5030 static void CreateSpecializedFunction(Thread* thread, |
| 5029 Zone* zone, | 5031 Zone* zone, |
| 5030 const RegExp& regexp, | 5032 const RegExp& regexp, |
| 5031 intptr_t specialization_cid, | 5033 intptr_t specialization_cid, |
| 5034 bool sticky, | |
| 5032 const Object& owner) { | 5035 const Object& owner) { |
| 5033 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | 5036 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
| 5034 | 5037 |
| 5035 Function& fn = | 5038 Function& fn = |
| 5036 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), | 5039 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), |
| 5037 RawFunction::kIrregexpFunction, | 5040 RawFunction::kIrregexpFunction, |
| 5038 true, // Static. | 5041 true, // Static. |
| 5039 false, // Not const. | 5042 false, // Not const. |
| 5040 false, // Not abstract. | 5043 false, // Not abstract. |
| 5041 false, // Not external. | 5044 false, // Not external. |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 5056 Object::dynamic_type()); | 5059 Object::dynamic_type()); |
| 5057 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, | 5060 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, |
| 5058 Symbols::string_param()); | 5061 Symbols::string_param()); |
| 5059 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, | 5062 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, |
| 5060 Object::dynamic_type()); | 5063 Object::dynamic_type()); |
| 5061 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStartOffsetIndex, | 5064 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStartOffsetIndex, |
| 5062 Symbols::start_index_param()); | 5065 Symbols::start_index_param()); |
| 5063 fn.set_result_type(Type::Handle(zone, Type::ArrayType())); | 5066 fn.set_result_type(Type::Handle(zone, Type::ArrayType())); |
| 5064 | 5067 |
| 5065 // Cache the result. | 5068 // Cache the result. |
| 5066 regexp.set_function(specialization_cid, fn); | 5069 regexp.set_function(specialization_cid, sticky, fn); |
| 5067 | 5070 |
| 5068 fn.SetRegExpData(regexp, specialization_cid); | 5071 fn.SetRegExpData(regexp, specialization_cid, sticky); |
| 5069 fn.set_is_debuggable(false); | 5072 fn.set_is_debuggable(false); |
| 5070 | 5073 |
| 5071 // The function is compiled lazily during the first call. | 5074 // The function is compiled lazily during the first call. |
| 5072 } | 5075 } |
| 5073 | 5076 |
| 5074 | 5077 |
| 5075 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, | 5078 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, |
| 5076 const String& pattern, | 5079 const String& pattern, |
| 5077 bool multi_line, | 5080 bool multi_line, |
| 5078 bool ignore_case) { | 5081 bool ignore_case) { |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 5091 // TODO(zerny): We might want to use normal string searching algorithms | 5094 // TODO(zerny): We might want to use normal string searching algorithms |
| 5092 // for simple patterns. | 5095 // for simple patterns. |
| 5093 regexp.set_is_complex(); | 5096 regexp.set_is_complex(); |
| 5094 regexp.set_is_global(); // All dart regexps are global. | 5097 regexp.set_is_global(); // All dart regexps are global. |
| 5095 | 5098 |
| 5096 if (!FLAG_interpret_irregexp) { | 5099 if (!FLAG_interpret_irregexp) { |
| 5097 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); | 5100 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); |
| 5098 const Class& owner = | 5101 const Class& owner = |
| 5099 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); | 5102 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); |
| 5100 | 5103 |
| 5101 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); | 5104 for (intptr_t cid = kOneByteStringCid; cid <= kExternalTwoByteStringCid; |
| 5102 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); | 5105 cid++) { |
| 5103 CreateSpecializedFunction(thread, zone, regexp, kExternalOneByteStringCid, | 5106 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, |
| 5104 owner); | 5107 owner); |
| 5105 CreateSpecializedFunction(thread, zone, regexp, kExternalTwoByteStringCid, | 5108 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, |
| 5106 owner); | 5109 owner); |
| 5110 } | |
| 5107 } | 5111 } |
| 5108 | 5112 |
| 5109 return regexp.raw(); | 5113 return regexp.raw(); |
| 5110 } | 5114 } |
| 5111 | 5115 |
| 5112 | 5116 |
| 5113 } // namespace dart | 5117 } // namespace dart |
| OLD | NEW |