OLD | NEW |
---|---|
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
6 | 6 |
7 #include "vm/dart_entry.h" | 7 #include "vm/dart_entry.h" |
8 #include "vm/regexp_assembler.h" | 8 #include "vm/regexp_assembler.h" |
9 #include "vm/regexp_assembler_bytecode.h" | 9 #include "vm/regexp_assembler_bytecode.h" |
10 #include "vm/regexp_assembler_ir.h" | 10 #include "vm/regexp_assembler_ir.h" |
(...skipping 4806 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4817 | 4817 |
4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
4819 RegExpCompileData* data, | 4819 RegExpCompileData* data, |
4820 const ParsedFunction* parsed_function, | 4820 const ParsedFunction* parsed_function, |
4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { | 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { |
4822 ASSERT(!FLAG_interpret_irregexp); | 4822 ASSERT(!FLAG_interpret_irregexp); |
4823 Zone* zone = Thread::Current()->zone(); | 4823 Zone* zone = Thread::Current()->zone(); |
4824 | 4824 |
4825 const Function& function = parsed_function->function(); | 4825 const Function& function = parsed_function->function(); |
4826 const intptr_t specialization_cid = function.string_specialization_cid(); | 4826 const intptr_t specialization_cid = function.string_specialization_cid(); |
4827 const intptr_t is_sticky = function.is_sticky_specialization(); | |
4827 const bool is_one_byte = (specialization_cid == kOneByteStringCid || | 4828 const bool is_one_byte = (specialization_cid == kOneByteStringCid || |
4828 specialization_cid == kExternalOneByteStringCid); | 4829 specialization_cid == kExternalOneByteStringCid); |
4829 RegExp& regexp = RegExp::Handle(zone, function.regexp()); | 4830 RegExp& regexp = RegExp::Handle(zone, function.regexp()); |
4830 const String& pattern = String::Handle(zone, regexp.pattern()); | 4831 const String& pattern = String::Handle(zone, regexp.pattern()); |
4831 | 4832 |
4832 ASSERT(!regexp.IsNull()); | 4833 ASSERT(!regexp.IsNull()); |
4833 ASSERT(!pattern.IsNull()); | 4834 ASSERT(!pattern.IsNull()); |
4834 | 4835 |
4835 const bool ignore_case = regexp.is_ignore_case(); | 4836 const bool ignore_case = regexp.is_ignore_case(); |
4836 const bool is_global = regexp.is_global(); | 4837 const bool is_global = regexp.is_global(); |
4837 | 4838 |
4838 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); | 4839 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); |
4839 | 4840 |
4840 // TODO(zerny): Frequency sampling is currently disabled because of several | 4841 // TODO(zerny): Frequency sampling is currently disabled because of several |
4841 // issues. We do not want to store subject strings in the regexp object since | 4842 // issues. We do not want to store subject strings in the regexp object since |
4842 // they might be long and we should not prevent their garbage collection. | 4843 // they might be long and we should not prevent their garbage collection. |
4843 // Passing them to this function explicitly does not help, since we must | 4844 // Passing them to this function explicitly does not help, since we must |
4844 // generate exactly the same IR for both the unoptimizing and optimizing | 4845 // generate exactly the same IR for both the unoptimizing and optimizing |
4845 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4846 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
4846 // An option would be to store sampling results in the regexp object, but | 4847 // An option would be to store sampling results in the regexp object, but |
4847 // I'm not sure the performance gains are relevant enough. | 4848 // I'm not sure the performance gains are relevant enough. |
4848 | 4849 |
4849 // Wrap the body of the regexp in capture #0. | 4850 // Wrap the body of the regexp in capture #0. |
4850 RegExpNode* captured_body = | 4851 RegExpNode* captured_body = |
4851 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); | 4852 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
4852 | 4853 |
4853 RegExpNode* node = captured_body; | 4854 RegExpNode* node = captured_body; |
4854 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4855 const bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
4855 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4856 const bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
4856 intptr_t max_length = data->tree->max_match(); | 4857 intptr_t max_length = data->tree->max_match(); |
4857 if (!is_start_anchored) { | 4858 if (!is_start_anchored && !is_sticky) { |
4858 // Add a .*? at the beginning, outside the body capture, unless | 4859 // Add a .*? at the beginning, outside the body capture, unless |
rmacnak
2016/11/17 00:49:52
Not sure I see why this property is called sticky,
erikcorry
2016/11/17 13:56:17
From es-discuss (!):
I believe the y actually com
| |
4859 // this expression is anchored at the beginning. | 4860 // this expression is anchored at the beginning or is sticky. |
4860 RegExpNode* loop_node = RegExpQuantifier::ToNode( | 4861 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
4861 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), | 4862 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
4862 &compiler, captured_body, data->contains_anchor); | 4863 &compiler, captured_body, data->contains_anchor); |
4863 | 4864 |
4864 if (data->contains_anchor) { | 4865 if (data->contains_anchor) { |
4865 // Unroll loop once, to take care of the case that might start | 4866 // Unroll loop once, to take care of the case that might start |
4866 // at the start of input. | 4867 // at the start of input. |
4867 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); | 4868 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
4868 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 4869 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
4869 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( | 4870 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
(...skipping 25 matching lines...) Expand all Loading... | |
4895 | 4896 |
4896 IRRegExpMacroAssembler* macro_assembler = | 4897 IRRegExpMacroAssembler* macro_assembler = |
4897 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, | 4898 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, |
4898 parsed_function, ic_data_array, zone); | 4899 parsed_function, ic_data_array, zone); |
4899 | 4900 |
4900 // Inserted here, instead of in Assembler, because it depends on information | 4901 // Inserted here, instead of in Assembler, because it depends on information |
4901 // in the AST that isn't replicated in the Node structure. | 4902 // in the AST that isn't replicated in the Node structure. |
4902 static const intptr_t kMaxBacksearchLimit = 1024; | 4903 static const intptr_t kMaxBacksearchLimit = 1024; |
4903 if (is_end_anchored && !is_start_anchored && | 4904 if (is_end_anchored && !is_start_anchored && |
4904 max_length < kMaxBacksearchLimit) { | 4905 max_length < kMaxBacksearchLimit) { |
4905 macro_assembler->SetCurrentPositionFromEnd(max_length); | 4906 macro_assembler->SetCurrentPositionFromEnd(max_length); |
erikcorry
2016/11/17 13:56:17
This needs fixing here too and we need a test for
Vyacheslav Egorov (Google)
2016/11/17 16:51:33
As we discussed I have fixed it here and I will po
| |
4906 } | 4907 } |
4907 | 4908 |
4908 if (is_global) { | 4909 if (is_global) { |
4909 macro_assembler->set_global_mode( | 4910 macro_assembler->set_global_mode( |
4910 (data->tree->min_match() > 0) | 4911 (data->tree->min_match() > 0) |
4911 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 4912 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
4912 : RegExpMacroAssembler::GLOBAL); | 4913 : RegExpMacroAssembler::GLOBAL); |
4913 } | 4914 } |
4914 | 4915 |
4915 RegExpEngine::CompilationResult result = | 4916 RegExpEngine::CompilationResult result = |
4916 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); | 4917 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
4917 | 4918 |
4918 if (FLAG_trace_irregexp) { | 4919 if (FLAG_trace_irregexp) { |
4919 macro_assembler->PrintBlocks(); | 4920 macro_assembler->PrintBlocks(); |
4920 } | 4921 } |
4921 | 4922 |
4922 return result; | 4923 return result; |
4923 } | 4924 } |
4924 | 4925 |
4925 | 4926 |
4926 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 4927 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
4927 RegExpCompileData* data, | 4928 RegExpCompileData* data, |
4928 const RegExp& regexp, | 4929 const RegExp& regexp, |
4929 bool is_one_byte, | 4930 bool is_one_byte, |
4931 bool is_sticky, | |
4930 Zone* zone) { | 4932 Zone* zone) { |
4931 ASSERT(FLAG_interpret_irregexp); | 4933 ASSERT(FLAG_interpret_irregexp); |
4932 const String& pattern = String::Handle(zone, regexp.pattern()); | 4934 const String& pattern = String::Handle(zone, regexp.pattern()); |
4933 | 4935 |
4934 ASSERT(!regexp.IsNull()); | 4936 ASSERT(!regexp.IsNull()); |
4935 ASSERT(!pattern.IsNull()); | 4937 ASSERT(!pattern.IsNull()); |
4936 | 4938 |
4937 const bool ignore_case = regexp.is_ignore_case(); | 4939 const bool ignore_case = regexp.is_ignore_case(); |
4938 const bool is_global = regexp.is_global(); | 4940 const bool is_global = regexp.is_global(); |
4939 | 4941 |
4940 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); | 4942 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); |
4941 | 4943 |
4942 // TODO(zerny): Frequency sampling is currently disabled because of several | 4944 // TODO(zerny): Frequency sampling is currently disabled because of several |
4943 // issues. We do not want to store subject strings in the regexp object since | 4945 // issues. We do not want to store subject strings in the regexp object since |
4944 // they might be long and we should not prevent their garbage collection. | 4946 // they might be long and we should not prevent their garbage collection. |
4945 // Passing them to this function explicitly does not help, since we must | 4947 // Passing them to this function explicitly does not help, since we must |
4946 // generate exactly the same IR for both the unoptimizing and optimizing | 4948 // generate exactly the same IR for both the unoptimizing and optimizing |
4947 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4949 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
4948 // An option would be to store sampling results in the regexp object, but | 4950 // An option would be to store sampling results in the regexp object, but |
4949 // I'm not sure the performance gains are relevant enough. | 4951 // I'm not sure the performance gains are relevant enough. |
4950 | 4952 |
4951 // Wrap the body of the regexp in capture #0. | 4953 // Wrap the body of the regexp in capture #0. |
4952 RegExpNode* captured_body = | 4954 RegExpNode* captured_body = |
4953 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); | 4955 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
4954 | 4956 |
4955 RegExpNode* node = captured_body; | 4957 RegExpNode* node = captured_body; |
4956 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4958 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
4957 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4959 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
4958 intptr_t max_length = data->tree->max_match(); | 4960 intptr_t max_length = data->tree->max_match(); |
4959 if (!is_start_anchored) { | 4961 if (!is_start_anchored && !is_sticky) { |
4960 // Add a .*? at the beginning, outside the body capture, unless | 4962 // Add a .*? at the beginning, outside the body capture, unless |
4961 // this expression is anchored at the beginning. | 4963 // this expression is anchored at the beginning. |
4962 RegExpNode* loop_node = RegExpQuantifier::ToNode( | 4964 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
4963 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), | 4965 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
4964 &compiler, captured_body, data->contains_anchor); | 4966 &compiler, captured_body, data->contains_anchor); |
4965 | 4967 |
4966 if (data->contains_anchor) { | 4968 if (data->contains_anchor) { |
4967 // Unroll loop once, to take care of the case that might start | 4969 // Unroll loop once, to take care of the case that might start |
4968 // at the start of input. | 4970 // at the start of input. |
4969 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); | 4971 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5022 } | 5024 } |
5023 | 5025 |
5024 return result; | 5026 return result; |
5025 } | 5027 } |
5026 | 5028 |
5027 | 5029 |
5028 static void CreateSpecializedFunction(Thread* thread, | 5030 static void CreateSpecializedFunction(Thread* thread, |
5029 Zone* zone, | 5031 Zone* zone, |
5030 const RegExp& regexp, | 5032 const RegExp& regexp, |
5031 intptr_t specialization_cid, | 5033 intptr_t specialization_cid, |
5034 bool sticky, | |
5032 const Object& owner) { | 5035 const Object& owner) { |
5033 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | 5036 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
5034 | 5037 |
5035 Function& fn = | 5038 Function& fn = |
5036 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), | 5039 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), |
5037 RawFunction::kIrregexpFunction, | 5040 RawFunction::kIrregexpFunction, |
5038 true, // Static. | 5041 true, // Static. |
5039 false, // Not const. | 5042 false, // Not const. |
5040 false, // Not abstract. | 5043 false, // Not abstract. |
5041 false, // Not external. | 5044 false, // Not external. |
(...skipping 14 matching lines...) Expand all Loading... | |
5056 Object::dynamic_type()); | 5059 Object::dynamic_type()); |
5057 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, | 5060 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, |
5058 Symbols::string_param()); | 5061 Symbols::string_param()); |
5059 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, | 5062 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, |
5060 Object::dynamic_type()); | 5063 Object::dynamic_type()); |
5061 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStartOffsetIndex, | 5064 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStartOffsetIndex, |
5062 Symbols::start_index_param()); | 5065 Symbols::start_index_param()); |
5063 fn.set_result_type(Type::Handle(zone, Type::ArrayType())); | 5066 fn.set_result_type(Type::Handle(zone, Type::ArrayType())); |
5064 | 5067 |
5065 // Cache the result. | 5068 // Cache the result. |
5066 regexp.set_function(specialization_cid, fn); | 5069 regexp.set_function(specialization_cid, sticky, fn); |
5067 | 5070 |
5068 fn.SetRegExpData(regexp, specialization_cid); | 5071 fn.SetRegExpData(regexp, specialization_cid, sticky); |
5069 fn.set_is_debuggable(false); | 5072 fn.set_is_debuggable(false); |
5070 | 5073 |
5071 // The function is compiled lazily during the first call. | 5074 // The function is compiled lazily during the first call. |
5072 } | 5075 } |
5073 | 5076 |
5074 | 5077 |
5075 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, | 5078 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, |
5076 const String& pattern, | 5079 const String& pattern, |
5077 bool multi_line, | 5080 bool multi_line, |
5078 bool ignore_case) { | 5081 bool ignore_case) { |
(...skipping 12 matching lines...) Expand all Loading... | |
5091 // TODO(zerny): We might want to use normal string searching algorithms | 5094 // TODO(zerny): We might want to use normal string searching algorithms |
5092 // for simple patterns. | 5095 // for simple patterns. |
5093 regexp.set_is_complex(); | 5096 regexp.set_is_complex(); |
5094 regexp.set_is_global(); // All dart regexps are global. | 5097 regexp.set_is_global(); // All dart regexps are global. |
5095 | 5098 |
5096 if (!FLAG_interpret_irregexp) { | 5099 if (!FLAG_interpret_irregexp) { |
5097 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); | 5100 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); |
5098 const Class& owner = | 5101 const Class& owner = |
5099 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); | 5102 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); |
5100 | 5103 |
5101 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); | 5104 for (intptr_t cid = kOneByteStringCid; cid <= kExternalTwoByteStringCid; |
5102 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); | 5105 cid++) { |
5103 CreateSpecializedFunction(thread, zone, regexp, kExternalOneByteStringCid, | 5106 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, |
5104 owner); | 5107 owner); |
5105 CreateSpecializedFunction(thread, zone, regexp, kExternalTwoByteStringCid, | 5108 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, |
5106 owner); | 5109 owner); |
5110 } | |
5107 } | 5111 } |
5108 | 5112 |
5109 return regexp.raw(); | 5113 return regexp.raw(); |
5110 } | 5114 } |
5111 | 5115 |
5112 | 5116 |
5113 } // namespace dart | 5117 } // namespace dart |
OLD | NEW |