Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(297)

Side by Side Diff: runtime/vm/regexp.cc

Issue 2510783002: VM: Optimize RegExp.matchAsPrefix(...) by generating a sticky RegExp specialization. (Closed)
Patch Set: Done Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp.h" 5 #include "vm/regexp.h"
6 6
7 #include "vm/dart_entry.h" 7 #include "vm/dart_entry.h"
8 #include "vm/regexp_assembler.h" 8 #include "vm/regexp_assembler.h"
9 #include "vm/regexp_assembler_bytecode.h" 9 #include "vm/regexp_assembler_bytecode.h"
10 #include "vm/regexp_assembler_ir.h" 10 #include "vm/regexp_assembler_ir.h"
(...skipping 4806 matching lines...) Expand 10 before | Expand all | Expand 10 after
4817 4817
4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR(
4819 RegExpCompileData* data, 4819 RegExpCompileData* data,
4820 const ParsedFunction* parsed_function, 4820 const ParsedFunction* parsed_function,
4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) {
4822 ASSERT(!FLAG_interpret_irregexp); 4822 ASSERT(!FLAG_interpret_irregexp);
4823 Zone* zone = Thread::Current()->zone(); 4823 Zone* zone = Thread::Current()->zone();
4824 4824
4825 const Function& function = parsed_function->function(); 4825 const Function& function = parsed_function->function();
4826 const intptr_t specialization_cid = function.string_specialization_cid(); 4826 const intptr_t specialization_cid = function.string_specialization_cid();
4827 const intptr_t is_sticky = function.is_sticky_specialization();
4827 const bool is_one_byte = (specialization_cid == kOneByteStringCid || 4828 const bool is_one_byte = (specialization_cid == kOneByteStringCid ||
4828 specialization_cid == kExternalOneByteStringCid); 4829 specialization_cid == kExternalOneByteStringCid);
4829 RegExp& regexp = RegExp::Handle(zone, function.regexp()); 4830 RegExp& regexp = RegExp::Handle(zone, function.regexp());
4830 const String& pattern = String::Handle(zone, regexp.pattern()); 4831 const String& pattern = String::Handle(zone, regexp.pattern());
4831 4832
4832 ASSERT(!regexp.IsNull()); 4833 ASSERT(!regexp.IsNull());
4833 ASSERT(!pattern.IsNull()); 4834 ASSERT(!pattern.IsNull());
4834 4835
4835 const bool ignore_case = regexp.is_ignore_case(); 4836 const bool ignore_case = regexp.is_ignore_case();
4836 const bool is_global = regexp.is_global(); 4837 const bool is_global = regexp.is_global();
4837 4838
4838 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); 4839 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
4839 4840
4840 // TODO(zerny): Frequency sampling is currently disabled because of several 4841 // TODO(zerny): Frequency sampling is currently disabled because of several
4841 // issues. We do not want to store subject strings in the regexp object since 4842 // issues. We do not want to store subject strings in the regexp object since
4842 // they might be long and we should not prevent their garbage collection. 4843 // they might be long and we should not prevent their garbage collection.
4843 // Passing them to this function explicitly does not help, since we must 4844 // Passing them to this function explicitly does not help, since we must
4844 // generate exactly the same IR for both the unoptimizing and optimizing 4845 // generate exactly the same IR for both the unoptimizing and optimizing
4845 // pipelines (otherwise it gets confused when i.e. deopt id's differ). 4846 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
4846 // An option would be to store sampling results in the regexp object, but 4847 // An option would be to store sampling results in the regexp object, but
4847 // I'm not sure the performance gains are relevant enough. 4848 // I'm not sure the performance gains are relevant enough.
4848 4849
4849 // Wrap the body of the regexp in capture #0. 4850 // Wrap the body of the regexp in capture #0.
4850 RegExpNode* captured_body = 4851 RegExpNode* captured_body =
4851 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); 4852 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
4852 4853
4853 RegExpNode* node = captured_body; 4854 RegExpNode* node = captured_body;
4854 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 4855 const bool is_end_anchored = data->tree->IsAnchoredAtEnd();
4855 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 4856 const bool is_start_anchored = data->tree->IsAnchoredAtStart();
4856 intptr_t max_length = data->tree->max_match(); 4857 intptr_t max_length = data->tree->max_match();
4857 if (!is_start_anchored) { 4858 if (!is_start_anchored && !is_sticky) {
4858 // Add a .*? at the beginning, outside the body capture, unless 4859 // Add a .*? at the beginning, outside the body capture, unless
rmacnak 2016/11/17 00:49:52 Not sure I see why this property is called sticky,
erikcorry 2016/11/17 13:56:17 From es-discuss (!): I believe the y actually com
4859 // this expression is anchored at the beginning. 4860 // this expression is anchored at the beginning or is sticky.
4860 RegExpNode* loop_node = RegExpQuantifier::ToNode( 4861 RegExpNode* loop_node = RegExpQuantifier::ToNode(
4861 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), 4862 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
4862 &compiler, captured_body, data->contains_anchor); 4863 &compiler, captured_body, data->contains_anchor);
4863 4864
4864 if (data->contains_anchor) { 4865 if (data->contains_anchor) {
4865 // Unroll loop once, to take care of the case that might start 4866 // Unroll loop once, to take care of the case that might start
4866 // at the start of input. 4867 // at the start of input.
4867 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); 4868 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
4868 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 4869 first_step_node->AddAlternative(GuardedAlternative(captured_body));
4869 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( 4870 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
(...skipping 25 matching lines...) Expand all
4895 4896
4896 IRRegExpMacroAssembler* macro_assembler = 4897 IRRegExpMacroAssembler* macro_assembler =
4897 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, 4898 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count,
4898 parsed_function, ic_data_array, zone); 4899 parsed_function, ic_data_array, zone);
4899 4900
4900 // Inserted here, instead of in Assembler, because it depends on information 4901 // Inserted here, instead of in Assembler, because it depends on information
4901 // in the AST that isn't replicated in the Node structure. 4902 // in the AST that isn't replicated in the Node structure.
4902 static const intptr_t kMaxBacksearchLimit = 1024; 4903 static const intptr_t kMaxBacksearchLimit = 1024;
4903 if (is_end_anchored && !is_start_anchored && 4904 if (is_end_anchored && !is_start_anchored &&
4904 max_length < kMaxBacksearchLimit) { 4905 max_length < kMaxBacksearchLimit) {
4905 macro_assembler->SetCurrentPositionFromEnd(max_length); 4906 macro_assembler->SetCurrentPositionFromEnd(max_length);
erikcorry 2016/11/17 13:56:17 This needs fixing here too and we need a test for
Vyacheslav Egorov (Google) 2016/11/17 16:51:33 As we discussed I have fixed it here and I will po
4906 } 4907 }
4907 4908
4908 if (is_global) { 4909 if (is_global) {
4909 macro_assembler->set_global_mode( 4910 macro_assembler->set_global_mode(
4910 (data->tree->min_match() > 0) 4911 (data->tree->min_match() > 0)
4911 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 4912 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
4912 : RegExpMacroAssembler::GLOBAL); 4913 : RegExpMacroAssembler::GLOBAL);
4913 } 4914 }
4914 4915
4915 RegExpEngine::CompilationResult result = 4916 RegExpEngine::CompilationResult result =
4916 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); 4917 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
4917 4918
4918 if (FLAG_trace_irregexp) { 4919 if (FLAG_trace_irregexp) {
4919 macro_assembler->PrintBlocks(); 4920 macro_assembler->PrintBlocks();
4920 } 4921 }
4921 4922
4922 return result; 4923 return result;
4923 } 4924 }
4924 4925
4925 4926
4926 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( 4927 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode(
4927 RegExpCompileData* data, 4928 RegExpCompileData* data,
4928 const RegExp& regexp, 4929 const RegExp& regexp,
4929 bool is_one_byte, 4930 bool is_one_byte,
4931 bool is_sticky,
4930 Zone* zone) { 4932 Zone* zone) {
4931 ASSERT(FLAG_interpret_irregexp); 4933 ASSERT(FLAG_interpret_irregexp);
4932 const String& pattern = String::Handle(zone, regexp.pattern()); 4934 const String& pattern = String::Handle(zone, regexp.pattern());
4933 4935
4934 ASSERT(!regexp.IsNull()); 4936 ASSERT(!regexp.IsNull());
4935 ASSERT(!pattern.IsNull()); 4937 ASSERT(!pattern.IsNull());
4936 4938
4937 const bool ignore_case = regexp.is_ignore_case(); 4939 const bool ignore_case = regexp.is_ignore_case();
4938 const bool is_global = regexp.is_global(); 4940 const bool is_global = regexp.is_global();
4939 4941
4940 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte); 4942 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
4941 4943
4942 // TODO(zerny): Frequency sampling is currently disabled because of several 4944 // TODO(zerny): Frequency sampling is currently disabled because of several
4943 // issues. We do not want to store subject strings in the regexp object since 4945 // issues. We do not want to store subject strings in the regexp object since
4944 // they might be long and we should not prevent their garbage collection. 4946 // they might be long and we should not prevent their garbage collection.
4945 // Passing them to this function explicitly does not help, since we must 4947 // Passing them to this function explicitly does not help, since we must
4946 // generate exactly the same IR for both the unoptimizing and optimizing 4948 // generate exactly the same IR for both the unoptimizing and optimizing
4947 // pipelines (otherwise it gets confused when i.e. deopt id's differ). 4949 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
4948 // An option would be to store sampling results in the regexp object, but 4950 // An option would be to store sampling results in the regexp object, but
4949 // I'm not sure the performance gains are relevant enough. 4951 // I'm not sure the performance gains are relevant enough.
4950 4952
4951 // Wrap the body of the regexp in capture #0. 4953 // Wrap the body of the regexp in capture #0.
4952 RegExpNode* captured_body = 4954 RegExpNode* captured_body =
4953 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); 4955 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
4954 4956
4955 RegExpNode* node = captured_body; 4957 RegExpNode* node = captured_body;
4956 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 4958 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
4957 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 4959 bool is_start_anchored = data->tree->IsAnchoredAtStart();
4958 intptr_t max_length = data->tree->max_match(); 4960 intptr_t max_length = data->tree->max_match();
4959 if (!is_start_anchored) { 4961 if (!is_start_anchored && !is_sticky) {
4960 // Add a .*? at the beginning, outside the body capture, unless 4962 // Add a .*? at the beginning, outside the body capture, unless
4961 // this expression is anchored at the beginning. 4963 // this expression is anchored at the beginning.
4962 RegExpNode* loop_node = RegExpQuantifier::ToNode( 4964 RegExpNode* loop_node = RegExpQuantifier::ToNode(
4963 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), 4965 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
4964 &compiler, captured_body, data->contains_anchor); 4966 &compiler, captured_body, data->contains_anchor);
4965 4967
4966 if (data->contains_anchor) { 4968 if (data->contains_anchor) {
4967 // Unroll loop once, to take care of the case that might start 4969 // Unroll loop once, to take care of the case that might start
4968 // at the start of input. 4970 // at the start of input.
4969 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); 4971 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
5022 } 5024 }
5023 5025
5024 return result; 5026 return result;
5025 } 5027 }
5026 5028
5027 5029
5028 static void CreateSpecializedFunction(Thread* thread, 5030 static void CreateSpecializedFunction(Thread* thread,
5029 Zone* zone, 5031 Zone* zone,
5030 const RegExp& regexp, 5032 const RegExp& regexp,
5031 intptr_t specialization_cid, 5033 intptr_t specialization_cid,
5034 bool sticky,
5032 const Object& owner) { 5035 const Object& owner) {
5033 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; 5036 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
5034 5037
5035 Function& fn = 5038 Function& fn =
5036 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), 5039 Function::Handle(zone, Function::New(Symbols::ColonMatcher(),
5037 RawFunction::kIrregexpFunction, 5040 RawFunction::kIrregexpFunction,
5038 true, // Static. 5041 true, // Static.
5039 false, // Not const. 5042 false, // Not const.
5040 false, // Not abstract. 5043 false, // Not abstract.
5041 false, // Not external. 5044 false, // Not external.
(...skipping 14 matching lines...) Expand all
5056 Object::dynamic_type()); 5059 Object::dynamic_type());
5057 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, 5060 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex,
5058 Symbols::string_param()); 5061 Symbols::string_param());
5059 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, 5062 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex,
5060 Object::dynamic_type()); 5063 Object::dynamic_type());
5061 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStartOffsetIndex, 5064 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStartOffsetIndex,
5062 Symbols::start_index_param()); 5065 Symbols::start_index_param());
5063 fn.set_result_type(Type::Handle(zone, Type::ArrayType())); 5066 fn.set_result_type(Type::Handle(zone, Type::ArrayType()));
5064 5067
5065 // Cache the result. 5068 // Cache the result.
5066 regexp.set_function(specialization_cid, fn); 5069 regexp.set_function(specialization_cid, sticky, fn);
5067 5070
5068 fn.SetRegExpData(regexp, specialization_cid); 5071 fn.SetRegExpData(regexp, specialization_cid, sticky);
5069 fn.set_is_debuggable(false); 5072 fn.set_is_debuggable(false);
5070 5073
5071 // The function is compiled lazily during the first call. 5074 // The function is compiled lazily during the first call.
5072 } 5075 }
5073 5076
5074 5077
5075 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, 5078 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread,
5076 const String& pattern, 5079 const String& pattern,
5077 bool multi_line, 5080 bool multi_line,
5078 bool ignore_case) { 5081 bool ignore_case) {
(...skipping 12 matching lines...) Expand all
5091 // TODO(zerny): We might want to use normal string searching algorithms 5094 // TODO(zerny): We might want to use normal string searching algorithms
5092 // for simple patterns. 5095 // for simple patterns.
5093 regexp.set_is_complex(); 5096 regexp.set_is_complex();
5094 regexp.set_is_global(); // All dart regexps are global. 5097 regexp.set_is_global(); // All dart regexps are global.
5095 5098
5096 if (!FLAG_interpret_irregexp) { 5099 if (!FLAG_interpret_irregexp) {
5097 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); 5100 const Library& lib = Library::Handle(zone, Library::CoreLibrary());
5098 const Class& owner = 5101 const Class& owner =
5099 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); 5102 Class::Handle(zone, lib.LookupClass(Symbols::RegExp()));
5100 5103
5101 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); 5104 for (intptr_t cid = kOneByteStringCid; cid <= kExternalTwoByteStringCid;
5102 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); 5105 cid++) {
5103 CreateSpecializedFunction(thread, zone, regexp, kExternalOneByteStringCid, 5106 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false,
5104 owner); 5107 owner);
5105 CreateSpecializedFunction(thread, zone, regexp, kExternalTwoByteStringCid, 5108 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true,
5106 owner); 5109 owner);
5110 }
5107 } 5111 }
5108 5112
5109 return regexp.raw(); 5113 return regexp.raw();
5110 } 5114 }
5111 5115
5112 5116
5113 } // namespace dart 5117 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698