Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(204)

Side by Side Diff: runtime/vm/regexp.cc

Issue 1201383002: Port irregexp bytecode compiler and interpreter from V8 r24065. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp.h" 5 #include "vm/regexp.h"
6 6
7 #include "vm/dart_entry.h" 7 #include "vm/dart_entry.h"
8 #include "vm/regexp_assembler.h" 8 #include "vm/regexp_assembler.h"
9 #include "vm/regexp_assembler_bytecode.h"
10 #include "vm/regexp_assembler_ir.h"
9 #include "vm/regexp_ast.h" 11 #include "vm/regexp_ast.h"
10 #include "vm/unibrow-inl.h" 12 #include "vm/unibrow-inl.h"
11 #include "vm/unicode.h" 13 #include "vm/unicode.h"
12 #include "vm/symbols.h" 14 #include "vm/symbols.h"
13 #include "vm/thread.h" 15 #include "vm/thread.h"
14 16
15 #define Z (zone()) 17 #define Z (zone())
16 18
17 namespace dart { 19 namespace dart {
18 20
19 DECLARE_FLAG(bool, trace_irregexp); 21 DECLARE_FLAG(bool, trace_irregexp);
22 DEFINE_FLAG(bool, interpret_irregexp, false,
23 "Use irregexp bytecode interpreter");
20 24
21 // Default to generating optimized regexp code. 25 // Default to generating optimized regexp code.
22 static const bool kRegexpOptimization = true; 26 static const bool kRegexpOptimization = true;
23 27
24 // More makes code generation slower, less makes V8 benchmark score lower. 28 // More makes code generation slower, less makes V8 benchmark score lower.
25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; 29 static const intptr_t kMaxLookaheadForBoyerMoore = 8;
26 30
27 ContainedInLattice AddRange(ContainedInLattice containment, 31 ContainedInLattice AddRange(ContainedInLattice containment,
28 const intptr_t* ranges, 32 const intptr_t* ranges,
29 intptr_t ranges_length, 33 intptr_t ranges_length,
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after
287 private: 291 private:
288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 292 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
289 intptr_t total_samples_; 293 intptr_t total_samples_;
290 }; 294 };
291 295
292 296
293 class RegExpCompiler : public ValueObject { 297 class RegExpCompiler : public ValueObject {
294 public: 298 public:
295 RegExpCompiler(intptr_t capture_count, 299 RegExpCompiler(intptr_t capture_count,
296 bool ignore_case, 300 bool ignore_case,
297 intptr_t specialization_cid); 301 bool is_one_byte);
298 302
299 intptr_t AllocateRegister() { 303 intptr_t AllocateRegister() {
300 return next_register_++; 304 return next_register_++;
301 } 305 }
302 306
303 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, 307 RegExpEngine::CompilationResult Assemble(
304 RegExpNode* start, 308 IRRegExpMacroAssembler* assembler,
305 intptr_t capture_count, 309 RegExpNode* start,
306 const String& pattern); 310 intptr_t capture_count,
311 const String& pattern);
312
313 RegExpEngine::CompilationResult Assemble(
314 BytecodeRegExpMacroAssembler* assembler,
315 RegExpNode* start,
316 intptr_t capture_count,
317 const String& pattern);
307 318
308 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 319 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
309 320
310 static const intptr_t kImplementationOffset = 0; 321 static const intptr_t kImplementationOffset = 0;
311 static const intptr_t kNumberOfRegistersOffset = 0; 322 static const intptr_t kNumberOfRegistersOffset = 0;
312 static const intptr_t kCodeOffset = 1; 323 static const intptr_t kCodeOffset = 1;
313 324
314 IRRegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 325 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
315 EndNode* accept() { return accept_; } 326 EndNode* accept() { return accept_; }
316 327
317 static const intptr_t kMaxRecursion = 100; 328 static const intptr_t kMaxRecursion = 100;
318 inline intptr_t recursion_depth() { return recursion_depth_; } 329 inline intptr_t recursion_depth() { return recursion_depth_; }
319 inline void IncrementRecursionDepth() { recursion_depth_++; } 330 inline void IncrementRecursionDepth() { recursion_depth_++; }
320 inline void DecrementRecursionDepth() { recursion_depth_--; } 331 inline void DecrementRecursionDepth() { recursion_depth_--; }
321 332
322 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 333 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
323 334
324 inline bool ignore_case() { return ignore_case_; } 335 inline bool ignore_case() { return ignore_case_; }
325 inline bool one_byte() const { 336 inline bool one_byte() const { return is_one_byte_; }
326 return (specialization_cid_ == kOneByteStringCid ||
327 specialization_cid_ == kExternalOneByteStringCid);
328 }
329 inline intptr_t specialization_cid() { return specialization_cid_; }
330 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 337 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
331 338
332 intptr_t current_expansion_factor() { return current_expansion_factor_; } 339 intptr_t current_expansion_factor() { return current_expansion_factor_; }
333 void set_current_expansion_factor(intptr_t value) { 340 void set_current_expansion_factor(intptr_t value) {
334 current_expansion_factor_ = value; 341 current_expansion_factor_ = value;
335 } 342 }
336 343
337 Zone* zone() const { return zone_; } 344 Zone* zone() const { return zone_; }
338 345
339 static const intptr_t kNoRegister = -1; 346 static const intptr_t kNoRegister = -1;
340 347
341 private: 348 private:
342 EndNode* accept_; 349 EndNode* accept_;
343 intptr_t next_register_; 350 intptr_t next_register_;
344 ZoneGrowableArray<RegExpNode*>* work_list_; 351 ZoneGrowableArray<RegExpNode*>* work_list_;
345 intptr_t recursion_depth_; 352 intptr_t recursion_depth_;
346 IRRegExpMacroAssembler* macro_assembler_; 353 RegExpMacroAssembler* macro_assembler_;
347 bool ignore_case_; 354 bool ignore_case_;
348 intptr_t specialization_cid_; 355 bool is_one_byte_;
349 bool reg_exp_too_big_; 356 bool reg_exp_too_big_;
350 intptr_t current_expansion_factor_; 357 intptr_t current_expansion_factor_;
351 FrequencyCollator frequency_collator_; 358 FrequencyCollator frequency_collator_;
352 Zone* zone_; 359 Zone* zone_;
353 }; 360 };
354 361
355 362
356 class RecursionCheck : public ValueObject { 363 class RecursionCheck : public ValueObject {
357 public: 364 public:
358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 365 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
359 compiler->IncrementRecursionDepth(); 366 compiler->IncrementRecursionDepth();
360 } 367 }
361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 368 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
362 private: 369 private:
363 RegExpCompiler* compiler_; 370 RegExpCompiler* compiler_;
364 }; 371 };
365 372
366 373
367 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 374 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
368 return RegExpEngine::CompilationResult("RegExp too big"); 375 return RegExpEngine::CompilationResult("RegExp too big");
369 } 376 }
370 377
371 378
372 // Attempts to compile the regexp using an Irregexp code generator. Returns 379 // Attempts to compile the regexp using an Irregexp code generator. Returns
373 // a fixed array or a null handle depending on whether it succeeded. 380 // a fixed array or a null handle depending on whether it succeeded.
374 RegExpCompiler::RegExpCompiler(intptr_t capture_count, bool ignore_case, 381 RegExpCompiler::RegExpCompiler(intptr_t capture_count,
375 intptr_t specialization_cid) 382 bool ignore_case,
383 bool is_one_byte)
376 : next_register_(2 * (capture_count + 1)), 384 : next_register_(2 * (capture_count + 1)),
377 work_list_(NULL), 385 work_list_(NULL),
378 recursion_depth_(0), 386 recursion_depth_(0),
379 ignore_case_(ignore_case), 387 ignore_case_(ignore_case),
380 specialization_cid_(specialization_cid), 388 is_one_byte_(is_one_byte),
381 reg_exp_too_big_(false), 389 reg_exp_too_big_(false),
382 current_expansion_factor_(1), 390 current_expansion_factor_(1),
383 zone_(Thread::Current()->zone()) { 391 zone_(Thread::Current()->zone()) {
384 accept_ = new(Z) EndNode(EndNode::ACCEPT, Z); 392 accept_ = new(Z) EndNode(EndNode::ACCEPT, Z);
385 } 393 }
386 394
387 395
388 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 396 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
389 IRRegExpMacroAssembler* macro_assembler, 397 IRRegExpMacroAssembler* macro_assembler,
390 RegExpNode* start, 398 RegExpNode* start,
(...skipping 16 matching lines...) Expand all
407 work_list.RemoveLast()->Emit(this, &new_trace); 415 work_list.RemoveLast()->Emit(this, &new_trace);
408 } 416 }
409 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); 417 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
410 418
411 macro_assembler->GenerateBacktrackBlock(); 419 macro_assembler->GenerateBacktrackBlock();
412 macro_assembler->FinalizeRegistersArray(); 420 macro_assembler->FinalizeRegistersArray();
413 421
414 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), 422 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(),
415 macro_assembler->graph_entry(), 423 macro_assembler->graph_entry(),
416 macro_assembler->num_blocks(), 424 macro_assembler->num_blocks(),
417 macro_assembler->num_stack_locals()); 425 macro_assembler->num_stack_locals(),
426 next_register_);
418 } 427 }
419 428
420 429
430 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
431 BytecodeRegExpMacroAssembler* macro_assembler,
432 RegExpNode* start,
433 intptr_t capture_count,
434 const String& pattern) {
435 static const bool use_slow_safe_regexp_compiler = false;
436
437 macro_assembler->set_slow_safe(use_slow_safe_regexp_compiler);
srdjan 2015/07/07 19:30:25 set_slow_safe(false /*use_slow_safe_regexp_compile
rmacnak 2015/07/07 21:42:53 Done.
438 macro_assembler_ = macro_assembler;
439
440 ZoneGrowableArray<RegExpNode*> work_list(0);
441 work_list_ = &work_list;
442 BlockLabel fail;
443 macro_assembler_->PushBacktrack(&fail);
444 Trace new_trace;
445 start->Emit(this, &new_trace);
446 macro_assembler_->BindBlock(&fail);
447 macro_assembler_->Fail();
448 while (!work_list.is_empty()) {
449 work_list.RemoveLast()->Emit(this, &new_trace);
450 }
451 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
452
453 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode());
454 return RegExpEngine::CompilationResult(&bytecode, next_register_);
455 }
456
457
421 bool Trace::DeferredAction::Mentions(intptr_t that) { 458 bool Trace::DeferredAction::Mentions(intptr_t that) {
422 if (action_type() == ActionNode::CLEAR_CAPTURES) { 459 if (action_type() == ActionNode::CLEAR_CAPTURES) {
423 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 460 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
424 return range.Contains(that); 461 return range.Contains(that);
425 } else { 462 } else {
426 return reg() == that; 463 return reg() == that;
427 } 464 }
428 } 465 }
429 466
430 467
(...skipping 4538 matching lines...) Expand 10 before | Expand all | Expand 10 after
4969 return; 5006 return;
4970 } 5007 }
4971 on_success()->FillInBMInfo(offset, 5008 on_success()->FillInBMInfo(offset,
4972 budget - 1, 5009 budget - 1,
4973 bm, 5010 bm,
4974 true); // Not at start after a text node. 5011 true); // Not at start after a text node.
4975 if (initial_offset == 0) set_bm_info(not_at_start, bm); 5012 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4976 } 5013 }
4977 5014
4978 5015
4979 RegExpEngine::CompilationResult RegExpEngine::Compile( 5016 RegExpEngine::CompilationResult RegExpEngine::CompileIR(
4980 RegExpCompileData* data, 5017 RegExpCompileData* data,
4981 const ParsedFunction* parsed_function, 5018 const ParsedFunction* parsed_function,
4982 const ZoneGrowableArray<const ICData*>& ic_data_array) { 5019 const ZoneGrowableArray<const ICData*>& ic_data_array) {
5020 ASSERT(!FLAG_interpret_irregexp);
4983 Zone* zone = Thread::Current()->zone(); 5021 Zone* zone = Thread::Current()->zone();
4984 5022
4985 const Function& function = parsed_function->function(); 5023 const Function& function = parsed_function->function();
4986 const intptr_t specialization_cid = function.regexp_cid(); 5024 const intptr_t specialization_cid = function.regexp_cid();
4987 const bool is_one_byte = (specialization_cid == kOneByteStringCid || 5025 const bool is_one_byte = (specialization_cid == kOneByteStringCid ||
4988 specialization_cid == kExternalOneByteStringCid); 5026 specialization_cid == kExternalOneByteStringCid);
4989 JSRegExp& regexp = JSRegExp::Handle(zone, function.regexp()); 5027 JSRegExp& regexp = JSRegExp::Handle(zone, function.regexp());
4990 const String& pattern = String::Handle(zone, regexp.pattern()); 5028 const String& pattern = String::Handle(zone, regexp.pattern());
4991 5029
4992 ASSERT(!regexp.IsNull()); 5030 ASSERT(!regexp.IsNull());
4993 ASSERT(!pattern.IsNull()); 5031 ASSERT(!pattern.IsNull());
4994 5032
4995 const bool ignore_case = regexp.is_ignore_case(); 5033 const bool ignore_case = regexp.is_ignore_case();
4996 const bool is_global = regexp.is_global(); 5034 const bool is_global = regexp.is_global();
4997 5035
4998 RegExpCompiler compiler(data->capture_count, ignore_case, specialization_cid); 5036 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
4999 5037
5000 // TODO(zerny): Frequency sampling is currently disabled because of several 5038 // TODO(zerny): Frequency sampling is currently disabled because of several
5001 // issues. We do not want to store subject strings in the regexp object since 5039 // issues. We do not want to store subject strings in the regexp object since
5002 // they might be long and we should not prevent their garbage collection. 5040 // they might be long and we should not prevent their garbage collection.
5003 // Passing them to this function explicitly does not help, since we must 5041 // Passing them to this function explicitly does not help, since we must
5004 // generate exactly the same IR for both the unoptimizing and optimizing 5042 // generate exactly the same IR for both the unoptimizing and optimizing
5005 // pipelines (otherwise it gets confused when i.e. deopt id's differ). 5043 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5006 // An option would be to store sampling results in the regexp object, but 5044 // An option would be to store sampling results in the regexp object, but
5007 // I'm not sure the performance gains are relevant enough. 5045 // I'm not sure the performance gains are relevant enough.
5008 5046
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
5091 pattern); 5129 pattern);
5092 5130
5093 if (FLAG_trace_irregexp) { 5131 if (FLAG_trace_irregexp) {
5094 macro_assembler->PrintBlocks(); 5132 macro_assembler->PrintBlocks();
5095 } 5133 }
5096 5134
5097 return result; 5135 return result;
5098 } 5136 }
5099 5137
5100 5138
5139 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode(
5140 RegExpCompileData* data,
5141 const JSRegExp& regexp,
5142 bool is_one_byte,
5143 Zone* zone) {
5144 ASSERT(FLAG_interpret_irregexp);
5145 const String& pattern = String::Handle(zone, regexp.pattern());
5146
5147 ASSERT(!regexp.IsNull());
5148 ASSERT(!pattern.IsNull());
5149
5150 const bool ignore_case = regexp.is_ignore_case();
5151 const bool is_global = regexp.is_global();
5152
5153 RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte);
5154
5155 // TODO(zerny): Frequency sampling is currently disabled because of several
5156 // issues. We do not want to store subject strings in the regexp object since
5157 // they might be long and we should not prevent their garbage collection.
5158 // Passing them to this function explicitly does not help, since we must
5159 // generate exactly the same IR for both the unoptimizing and optimizing
5160 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5161 // An option would be to store sampling results in the regexp object, but
5162 // I'm not sure the performance gains are relevant enough.
5163
5164 // Wrap the body of the regexp in capture #0.
5165 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
5166 0,
5167 &compiler,
5168 compiler.accept());
5169
5170 RegExpNode* node = captured_body;
5171 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5172 bool is_start_anchored = data->tree->IsAnchoredAtStart();
5173 intptr_t max_length = data->tree->max_match();
5174 if (!is_start_anchored) {
5175 // Add a .*? at the beginning, outside the body capture, unless
5176 // this expression is anchored at the beginning.
5177 RegExpNode* loop_node =
5178 RegExpQuantifier::ToNode(0,
5179 RegExpTree::kInfinity,
5180 false,
5181 new(zone) RegExpCharacterClass('*'),
5182 &compiler,
5183 captured_body,
5184 data->contains_anchor);
5185
5186 if (data->contains_anchor) {
5187 // Unroll loop once, to take care of the case that might start
5188 // at the start of input.
5189 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
5190 first_step_node->AddAlternative(GuardedAlternative(captured_body));
5191 first_step_node->AddAlternative(GuardedAlternative(
5192 new(zone) TextNode(
5193 new(zone) RegExpCharacterClass('*'), loop_node)));
5194 node = first_step_node;
5195 } else {
5196 node = loop_node;
5197 }
5198 }
5199 if (is_one_byte) {
5200 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5201 // Do it again to propagate the new nodes to places where they were not
5202 // put because they had not been calculated yet.
5203 if (node != NULL) {
5204 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5205 }
5206 }
5207
5208 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
5209 data->node = node;
5210 Analysis analysis(ignore_case, is_one_byte);
5211 analysis.EnsureAnalyzed(node);
5212 if (analysis.has_failed()) {
5213 const char* error_message = analysis.error_message();
5214 return CompilationResult(error_message);
5215 }
5216
5217 // Bytecode regexp implementation.
5218
5219 ZoneGrowableArray<uint8_t> buffer(zone, 1024);
5220 BytecodeRegExpMacroAssembler* macro_assembler =
5221 new(zone) BytecodeRegExpMacroAssembler(&buffer, zone);
5222
5223 // Inserted here, instead of in Assembler, because it depends on information
5224 // in the AST that isn't replicated in the Node structure.
5225 static const intptr_t kMaxBacksearchLimit = 1024;
5226 if (is_end_anchored &&
5227 !is_start_anchored &&
5228 max_length < kMaxBacksearchLimit) {
5229 macro_assembler->SetCurrentPositionFromEnd(max_length);
5230 }
5231
5232 if (is_global) {
5233 macro_assembler->set_global_mode(
5234 (data->tree->min_match() > 0)
5235 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
5236 : RegExpMacroAssembler::GLOBAL);
5237 }
5238
5239 RegExpEngine::CompilationResult result =
5240 compiler.Assemble(macro_assembler,
5241 node,
5242 data->capture_count,
5243 pattern);
5244
5245 if (FLAG_trace_irregexp) {
5246 macro_assembler->PrintBlocks();
5247 }
5248
5249 return result;
5250 }
5251
5252
5101 static void CreateSpecializedFunction(Zone* zone, 5253 static void CreateSpecializedFunction(Zone* zone,
5102 const JSRegExp& regexp, 5254 const JSRegExp& regexp,
5103 intptr_t specialization_cid, 5255 intptr_t specialization_cid,
5104 const Object& owner) { 5256 const Object& owner) {
5105 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; 5257 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
5106 5258
5107 Function& fn = Function::Handle(zone, Function::New( 5259 Function& fn = Function::Handle(zone, Function::New(
5108 // Append the regexp pattern to the function name. 5260 // Append the regexp pattern to the function name.
5109 String::Handle(zone, Symbols::New( 5261 String::Handle(zone, Symbols::New(
5110 String::Handle(zone, String::Concat( 5262 String::Handle(zone, String::Concat(
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
5171 CreateSpecializedFunction(zone, regexp, kOneByteStringCid, owner); 5323 CreateSpecializedFunction(zone, regexp, kOneByteStringCid, owner);
5172 CreateSpecializedFunction(zone, regexp, kTwoByteStringCid, owner); 5324 CreateSpecializedFunction(zone, regexp, kTwoByteStringCid, owner);
5173 CreateSpecializedFunction(zone, regexp, kExternalOneByteStringCid, owner); 5325 CreateSpecializedFunction(zone, regexp, kExternalOneByteStringCid, owner);
5174 CreateSpecializedFunction(zone, regexp, kExternalTwoByteStringCid, owner); 5326 CreateSpecializedFunction(zone, regexp, kExternalTwoByteStringCid, owner);
5175 5327
5176 return regexp.raw(); 5328 return regexp.raw();
5177 } 5329 }
5178 5330
5179 5331
5180 } // namespace dart 5332 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698