OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
6 | 6 |
7 #include "vm/dart_entry.h" | 7 #include "vm/dart_entry.h" |
8 #include "vm/regexp_assembler.h" | 8 #include "vm/regexp_assembler.h" |
9 #include "vm/regexp_assembler_bytecode.h" | 9 #include "vm/regexp_assembler_bytecode.h" |
10 #include "vm/regexp_assembler_ir.h" | 10 #include "vm/regexp_assembler_ir.h" |
11 #include "vm/regexp_ast.h" | 11 #include "vm/regexp_ast.h" |
| 12 #include "vm/symbols.h" |
| 13 #include "vm/thread.h" |
12 #include "vm/unibrow-inl.h" | 14 #include "vm/unibrow-inl.h" |
13 #include "vm/unicode.h" | 15 #include "vm/unicode.h" |
14 #include "vm/symbols.h" | |
15 #include "vm/thread.h" | |
16 | 16 |
17 #define Z (zone()) | 17 #define Z (zone()) |
18 | 18 |
19 namespace dart { | 19 namespace dart { |
20 | 20 |
21 // Default to generating optimized regexp code. | 21 // Default to generating optimized regexp code. |
22 static const bool kRegexpOptimization = true; | 22 static const bool kRegexpOptimization = true; |
23 | 23 |
24 // More makes code generation slower, less makes V8 benchmark score lower. | 24 // More makes code generation slower, less makes V8 benchmark score lower. |
25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; | 25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; |
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
189 // string. Therefore code generated for a non-trivial trace is specialized | 189 // string. Therefore code generated for a non-trivial trace is specialized |
190 // to that trace. The code generator therefore has the ability to generate | 190 // to that trace. The code generator therefore has the ability to generate |
191 // code for each node several times. In order to limit the size of the | 191 // code for each node several times. In order to limit the size of the |
192 // generated code there is an arbitrary limit on how many specialized sets of | 192 // generated code there is an arbitrary limit on how many specialized sets of |
193 // code may be generated for a given node. If the limit is reached, the | 193 // code may be generated for a given node. If the limit is reached, the |
194 // trace is flushed and a generic version of the code for a node is emitted. | 194 // trace is flushed and a generic version of the code for a node is emitted. |
195 // This is subsequently used for that node. The code emitted for non-generic | 195 // This is subsequently used for that node. The code emitted for non-generic |
196 // trace is not recorded in the node and so it cannot currently be reused in | 196 // trace is not recorded in the node and so it cannot currently be reused in |
197 // the event that code generation is requested for an identical trace. | 197 // the event that code generation is requested for an identical trace. |
198 | 198 |
199 | |
200 void RegExpTree::AppendToText(RegExpText* text) { | 199 void RegExpTree::AppendToText(RegExpText* text) { |
201 UNREACHABLE(); | 200 UNREACHABLE(); |
202 } | 201 } |
203 | 202 |
204 | |
205 void RegExpAtom::AppendToText(RegExpText* text) { | 203 void RegExpAtom::AppendToText(RegExpText* text) { |
206 text->AddElement(TextElement::Atom(this)); | 204 text->AddElement(TextElement::Atom(this)); |
207 } | 205 } |
208 | 206 |
209 | |
210 void RegExpCharacterClass::AppendToText(RegExpText* text) { | 207 void RegExpCharacterClass::AppendToText(RegExpText* text) { |
211 text->AddElement(TextElement::CharClass(this)); | 208 text->AddElement(TextElement::CharClass(this)); |
212 } | 209 } |
213 | 210 |
214 | |
215 void RegExpText::AppendToText(RegExpText* text) { | 211 void RegExpText::AppendToText(RegExpText* text) { |
216 for (intptr_t i = 0; i < elements()->length(); i++) | 212 for (intptr_t i = 0; i < elements()->length(); i++) |
217 text->AddElement((*elements())[i]); | 213 text->AddElement((*elements())[i]); |
218 } | 214 } |
219 | 215 |
220 | |
221 TextElement TextElement::Atom(RegExpAtom* atom) { | 216 TextElement TextElement::Atom(RegExpAtom* atom) { |
222 return TextElement(ATOM, atom); | 217 return TextElement(ATOM, atom); |
223 } | 218 } |
224 | 219 |
225 | |
226 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { | 220 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
227 return TextElement(CHAR_CLASS, char_class); | 221 return TextElement(CHAR_CLASS, char_class); |
228 } | 222 } |
229 | 223 |
230 | |
231 intptr_t TextElement::length() const { | 224 intptr_t TextElement::length() const { |
232 switch (text_type()) { | 225 switch (text_type()) { |
233 case ATOM: | 226 case ATOM: |
234 return atom()->length(); | 227 return atom()->length(); |
235 | 228 |
236 case CHAR_CLASS: | 229 case CHAR_CLASS: |
237 return 1; | 230 return 1; |
238 } | 231 } |
239 UNREACHABLE(); | 232 UNREACHABLE(); |
240 return 0; | 233 return 0; |
241 } | 234 } |
242 | 235 |
243 | |
244 class FrequencyCollator : public ValueObject { | 236 class FrequencyCollator : public ValueObject { |
245 public: | 237 public: |
246 FrequencyCollator() : total_samples_(0) { | 238 FrequencyCollator() : total_samples_(0) { |
247 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { | 239 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { |
248 frequencies_[i] = CharacterFrequency(i); | 240 frequencies_[i] = CharacterFrequency(i); |
249 } | 241 } |
250 } | 242 } |
251 | 243 |
252 void CountCharacter(intptr_t character) { | 244 void CountCharacter(intptr_t character) { |
253 intptr_t index = (character & RegExpMacroAssembler::kTableMask); | 245 intptr_t index = (character & RegExpMacroAssembler::kTableMask); |
(...skipping 22 matching lines...) Expand all Loading... |
276 intptr_t counter() { return counter_; } | 268 intptr_t counter() { return counter_; } |
277 intptr_t character() { return character_; } | 269 intptr_t character() { return character_; } |
278 | 270 |
279 private: | 271 private: |
280 intptr_t counter_; | 272 intptr_t counter_; |
281 intptr_t character_; | 273 intptr_t character_; |
282 | 274 |
283 DISALLOW_ALLOCATION(); | 275 DISALLOW_ALLOCATION(); |
284 }; | 276 }; |
285 | 277 |
286 | |
287 private: | 278 private: |
288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 279 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
289 intptr_t total_samples_; | 280 intptr_t total_samples_; |
290 }; | 281 }; |
291 | 282 |
292 | |
293 class RegExpCompiler : public ValueObject { | 283 class RegExpCompiler : public ValueObject { |
294 public: | 284 public: |
295 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); | 285 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); |
296 | 286 |
297 intptr_t AllocateRegister() { return next_register_++; } | 287 intptr_t AllocateRegister() { return next_register_++; } |
298 | 288 |
299 #if !defined(DART_PRECOMPILED_RUNTIME) | 289 #if !defined(DART_PRECOMPILED_RUNTIME) |
300 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, | 290 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, |
301 RegExpNode* start, | 291 RegExpNode* start, |
302 intptr_t capture_count, | 292 intptr_t capture_count, |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
345 intptr_t recursion_depth_; | 335 intptr_t recursion_depth_; |
346 RegExpMacroAssembler* macro_assembler_; | 336 RegExpMacroAssembler* macro_assembler_; |
347 bool ignore_case_; | 337 bool ignore_case_; |
348 bool is_one_byte_; | 338 bool is_one_byte_; |
349 bool reg_exp_too_big_; | 339 bool reg_exp_too_big_; |
350 intptr_t current_expansion_factor_; | 340 intptr_t current_expansion_factor_; |
351 FrequencyCollator frequency_collator_; | 341 FrequencyCollator frequency_collator_; |
352 Zone* zone_; | 342 Zone* zone_; |
353 }; | 343 }; |
354 | 344 |
355 | |
356 class RecursionCheck : public ValueObject { | 345 class RecursionCheck : public ValueObject { |
357 public: | 346 public: |
358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 347 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
359 compiler->IncrementRecursionDepth(); | 348 compiler->IncrementRecursionDepth(); |
360 } | 349 } |
361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 350 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
362 | 351 |
363 private: | 352 private: |
364 RegExpCompiler* compiler_; | 353 RegExpCompiler* compiler_; |
365 }; | 354 }; |
366 | 355 |
367 | |
368 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 356 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
369 return RegExpEngine::CompilationResult("RegExp too big"); | 357 return RegExpEngine::CompilationResult("RegExp too big"); |
370 } | 358 } |
371 | 359 |
372 | |
373 // Attempts to compile the regexp using an Irregexp code generator. Returns | 360 // Attempts to compile the regexp using an Irregexp code generator. Returns |
374 // a fixed array or a null handle depending on whether it succeeded. | 361 // a fixed array or a null handle depending on whether it succeeded. |
375 RegExpCompiler::RegExpCompiler(intptr_t capture_count, | 362 RegExpCompiler::RegExpCompiler(intptr_t capture_count, |
376 bool ignore_case, | 363 bool ignore_case, |
377 bool is_one_byte) | 364 bool is_one_byte) |
378 : next_register_(2 * (capture_count + 1)), | 365 : next_register_(2 * (capture_count + 1)), |
379 work_list_(NULL), | 366 work_list_(NULL), |
380 recursion_depth_(0), | 367 recursion_depth_(0), |
381 ignore_case_(ignore_case), | 368 ignore_case_(ignore_case), |
382 is_one_byte_(is_one_byte), | 369 is_one_byte_(is_one_byte), |
383 reg_exp_too_big_(false), | 370 reg_exp_too_big_(false), |
384 current_expansion_factor_(1), | 371 current_expansion_factor_(1), |
385 zone_(Thread::Current()->zone()) { | 372 zone_(Thread::Current()->zone()) { |
386 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); | 373 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); |
387 } | 374 } |
388 | 375 |
389 | |
390 #if !defined(DART_PRECOMPILED_RUNTIME) | 376 #if !defined(DART_PRECOMPILED_RUNTIME) |
391 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 377 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
392 IRRegExpMacroAssembler* macro_assembler, | 378 IRRegExpMacroAssembler* macro_assembler, |
393 RegExpNode* start, | 379 RegExpNode* start, |
394 intptr_t capture_count, | 380 intptr_t capture_count, |
395 const String& pattern) { | 381 const String& pattern) { |
396 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 382 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
397 macro_assembler_ = macro_assembler; | 383 macro_assembler_ = macro_assembler; |
398 | 384 |
399 ZoneGrowableArray<RegExpNode*> work_list(0); | 385 ZoneGrowableArray<RegExpNode*> work_list(0); |
(...skipping 12 matching lines...) Expand all Loading... |
412 macro_assembler->GenerateBacktrackBlock(); | 398 macro_assembler->GenerateBacktrackBlock(); |
413 macro_assembler->FinalizeRegistersArray(); | 399 macro_assembler->FinalizeRegistersArray(); |
414 | 400 |
415 return RegExpEngine::CompilationResult( | 401 return RegExpEngine::CompilationResult( |
416 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), | 402 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), |
417 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), | 403 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), |
418 next_register_); | 404 next_register_); |
419 } | 405 } |
420 #endif | 406 #endif |
421 | 407 |
422 | |
423 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 408 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
424 BytecodeRegExpMacroAssembler* macro_assembler, | 409 BytecodeRegExpMacroAssembler* macro_assembler, |
425 RegExpNode* start, | 410 RegExpNode* start, |
426 intptr_t capture_count, | 411 intptr_t capture_count, |
427 const String& pattern) { | 412 const String& pattern) { |
428 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 413 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
429 macro_assembler_ = macro_assembler; | 414 macro_assembler_ = macro_assembler; |
430 | 415 |
431 ZoneGrowableArray<RegExpNode*> work_list(0); | 416 ZoneGrowableArray<RegExpNode*> work_list(0); |
432 work_list_ = &work_list; | 417 work_list_ = &work_list; |
433 BlockLabel fail; | 418 BlockLabel fail; |
434 macro_assembler_->PushBacktrack(&fail); | 419 macro_assembler_->PushBacktrack(&fail); |
435 Trace new_trace; | 420 Trace new_trace; |
436 start->Emit(this, &new_trace); | 421 start->Emit(this, &new_trace); |
437 macro_assembler_->BindBlock(&fail); | 422 macro_assembler_->BindBlock(&fail); |
438 macro_assembler_->Fail(); | 423 macro_assembler_->Fail(); |
439 while (!work_list.is_empty()) { | 424 while (!work_list.is_empty()) { |
440 work_list.RemoveLast()->Emit(this, &new_trace); | 425 work_list.RemoveLast()->Emit(this, &new_trace); |
441 } | 426 } |
442 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 427 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
443 | 428 |
444 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); | 429 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); |
445 return RegExpEngine::CompilationResult(&bytecode, next_register_); | 430 return RegExpEngine::CompilationResult(&bytecode, next_register_); |
446 } | 431 } |
447 | 432 |
448 | |
449 bool Trace::DeferredAction::Mentions(intptr_t that) { | 433 bool Trace::DeferredAction::Mentions(intptr_t that) { |
450 if (action_type() == ActionNode::CLEAR_CAPTURES) { | 434 if (action_type() == ActionNode::CLEAR_CAPTURES) { |
451 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 435 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
452 return range.Contains(that); | 436 return range.Contains(that); |
453 } else { | 437 } else { |
454 return reg() == that; | 438 return reg() == that; |
455 } | 439 } |
456 } | 440 } |
457 | 441 |
458 | |
459 bool Trace::mentions_reg(intptr_t reg) { | 442 bool Trace::mentions_reg(intptr_t reg) { |
460 for (DeferredAction* action = actions_; action != NULL; | 443 for (DeferredAction* action = actions_; action != NULL; |
461 action = action->next()) { | 444 action = action->next()) { |
462 if (action->Mentions(reg)) return true; | 445 if (action->Mentions(reg)) return true; |
463 } | 446 } |
464 return false; | 447 return false; |
465 } | 448 } |
466 | 449 |
467 | |
468 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { | 450 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { |
469 ASSERT(*cp_offset == 0); | 451 ASSERT(*cp_offset == 0); |
470 for (DeferredAction* action = actions_; action != NULL; | 452 for (DeferredAction* action = actions_; action != NULL; |
471 action = action->next()) { | 453 action = action->next()) { |
472 if (action->Mentions(reg)) { | 454 if (action->Mentions(reg)) { |
473 if (action->action_type() == ActionNode::STORE_POSITION) { | 455 if (action->action_type() == ActionNode::STORE_POSITION) { |
474 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); | 456 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); |
475 return true; | 457 return true; |
476 } else { | 458 } else { |
477 return false; | 459 return false; |
478 } | 460 } |
479 } | 461 } |
480 } | 462 } |
481 return false; | 463 return false; |
482 } | 464 } |
483 | 465 |
484 | |
485 // This is called as we come into a loop choice node and some other tricky | 466 // This is called as we come into a loop choice node and some other tricky |
486 // nodes. It normalizes the state of the code generator to ensure we can | 467 // nodes. It normalizes the state of the code generator to ensure we can |
487 // generate generic code. | 468 // generate generic code. |
488 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { | 469 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { |
489 intptr_t max_register = RegExpCompiler::kNoRegister; | 470 intptr_t max_register = RegExpCompiler::kNoRegister; |
490 for (DeferredAction* action = actions_; action != NULL; | 471 for (DeferredAction* action = actions_; action != NULL; |
491 action = action->next()) { | 472 action = action->next()) { |
492 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { | 473 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { |
493 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); | 474 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); |
494 for (intptr_t i = range.from(); i <= range.to(); i++) | 475 for (intptr_t i = range.from(); i <= range.to(); i++) |
495 affected_registers->Set(i, zone); | 476 affected_registers->Set(i, zone); |
496 if (range.to() > max_register) max_register = range.to(); | 477 if (range.to() > max_register) max_register = range.to(); |
497 } else { | 478 } else { |
498 affected_registers->Set(action->reg(), zone); | 479 affected_registers->Set(action->reg(), zone); |
499 if (action->reg() > max_register) max_register = action->reg(); | 480 if (action->reg() > max_register) max_register = action->reg(); |
500 } | 481 } |
501 } | 482 } |
502 return max_register; | 483 return max_register; |
503 } | 484 } |
504 | 485 |
505 | |
506 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, | 486 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, |
507 intptr_t max_register, | 487 intptr_t max_register, |
508 const OutSet& registers_to_pop, | 488 const OutSet& registers_to_pop, |
509 const OutSet& registers_to_clear) { | 489 const OutSet& registers_to_clear) { |
510 for (intptr_t reg = max_register; reg >= 0; reg--) { | 490 for (intptr_t reg = max_register; reg >= 0; reg--) { |
511 if (registers_to_pop.Get(reg)) { | 491 if (registers_to_pop.Get(reg)) { |
512 assembler->PopRegister(reg); | 492 assembler->PopRegister(reg); |
513 } else if (registers_to_clear.Get(reg)) { | 493 } else if (registers_to_clear.Get(reg)) { |
514 intptr_t clear_to = reg; | 494 intptr_t clear_to = reg; |
515 while (reg > 0 && registers_to_clear.Get(reg - 1)) { | 495 while (reg > 0 && registers_to_clear.Get(reg - 1)) { |
516 reg--; | 496 reg--; |
517 } | 497 } |
518 assembler->ClearRegisters(reg, clear_to); | 498 assembler->ClearRegisters(reg, clear_to); |
519 } | 499 } |
520 } | 500 } |
521 } | 501 } |
522 | 502 |
523 | |
524 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, | 503 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
525 intptr_t max_register, | 504 intptr_t max_register, |
526 const OutSet& affected_registers, | 505 const OutSet& affected_registers, |
527 OutSet* registers_to_pop, | 506 OutSet* registers_to_pop, |
528 OutSet* registers_to_clear, | 507 OutSet* registers_to_clear, |
529 Zone* zone) { | 508 Zone* zone) { |
530 for (intptr_t reg = 0; reg <= max_register; reg++) { | 509 for (intptr_t reg = 0; reg <= max_register; reg++) { |
531 if (!affected_registers.Get(reg)) { | 510 if (!affected_registers.Get(reg)) { |
532 continue; | 511 continue; |
533 } | 512 } |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
628 } else if (clear) { | 607 } else if (clear) { |
629 assembler->ClearRegisters(reg, reg); | 608 assembler->ClearRegisters(reg, reg); |
630 } else if (absolute) { | 609 } else if (absolute) { |
631 assembler->SetRegister(reg, value); | 610 assembler->SetRegister(reg, value); |
632 } else if (value != 0) { | 611 } else if (value != 0) { |
633 assembler->AdvanceRegister(reg, value); | 612 assembler->AdvanceRegister(reg, value); |
634 } | 613 } |
635 } | 614 } |
636 } | 615 } |
637 | 616 |
638 | |
639 // This is called as we come into a loop choice node and some other tricky | 617 // This is called as we come into a loop choice node and some other tricky |
640 // nodes. It normalizes the state of the code generator to ensure we can | 618 // nodes. It normalizes the state of the code generator to ensure we can |
641 // generate generic code. | 619 // generate generic code. |
642 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 620 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { |
643 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 621 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
644 | 622 |
645 ASSERT(!is_trivial()); | 623 ASSERT(!is_trivial()); |
646 | 624 |
647 if (actions_ == NULL && backtrack() == NULL) { | 625 if (actions_ == NULL && backtrack() == NULL) { |
648 // Here we just have some deferred cp advances to fix and we are back to | 626 // Here we just have some deferred cp advances to fix and we are back to |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
685 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, | 663 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, |
686 registers_to_clear); | 664 registers_to_clear); |
687 if (backtrack() == NULL) { | 665 if (backtrack() == NULL) { |
688 assembler->Backtrack(); | 666 assembler->Backtrack(); |
689 } else { | 667 } else { |
690 assembler->PopCurrentPosition(); | 668 assembler->PopCurrentPosition(); |
691 assembler->GoTo(backtrack()); | 669 assembler->GoTo(backtrack()); |
692 } | 670 } |
693 } | 671 } |
694 | 672 |
695 | |
696 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { | 673 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { |
697 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 674 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
698 | 675 |
699 // Omit flushing the trace. We discard the entire stack frame anyway. | 676 // Omit flushing the trace. We discard the entire stack frame anyway. |
700 | 677 |
701 if (!label()->IsBound()) { | 678 if (!label()->IsBound()) { |
702 // We are completely independent of the trace, since we ignore it, | 679 // We are completely independent of the trace, since we ignore it, |
703 // so this code can be used as the generic version. | 680 // so this code can be used as the generic version. |
704 assembler->BindBlock(label()); | 681 assembler->BindBlock(label()); |
705 } | 682 } |
706 | 683 |
707 // Throw away everything on the backtrack stack since the start | 684 // Throw away everything on the backtrack stack since the start |
708 // of the negative submatch and restore the character position. | 685 // of the negative submatch and restore the character position. |
709 assembler->ReadCurrentPositionFromRegister(current_position_register_); | 686 assembler->ReadCurrentPositionFromRegister(current_position_register_); |
710 assembler->ReadStackPointerFromRegister(stack_pointer_register_); | 687 assembler->ReadStackPointerFromRegister(stack_pointer_register_); |
711 if (clear_capture_count_ > 0) { | 688 if (clear_capture_count_ > 0) { |
712 // Clear any captures that might have been performed during the success | 689 // Clear any captures that might have been performed during the success |
713 // of the body of the negative look-ahead. | 690 // of the body of the negative look-ahead. |
714 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; | 691 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; |
715 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); | 692 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); |
716 } | 693 } |
717 // Now that we have unwound the stack we find at the top of the stack the | 694 // Now that we have unwound the stack we find at the top of the stack the |
718 // backtrack that the BeginSubmatch node got. | 695 // backtrack that the BeginSubmatch node got. |
719 assembler->Backtrack(); | 696 assembler->Backtrack(); |
720 } | 697 } |
721 | 698 |
722 | |
723 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 699 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
724 if (!trace->is_trivial()) { | 700 if (!trace->is_trivial()) { |
725 trace->Flush(compiler, this); | 701 trace->Flush(compiler, this); |
726 return; | 702 return; |
727 } | 703 } |
728 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 704 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
729 if (!label()->IsBound()) { | 705 if (!label()->IsBound()) { |
730 assembler->BindBlock(label()); | 706 assembler->BindBlock(label()); |
731 } | 707 } |
732 switch (action_) { | 708 switch (action_) { |
733 case ACCEPT: | 709 case ACCEPT: |
734 assembler->Succeed(); | 710 assembler->Succeed(); |
735 return; | 711 return; |
736 case BACKTRACK: | 712 case BACKTRACK: |
737 assembler->GoTo(trace->backtrack()); | 713 assembler->GoTo(trace->backtrack()); |
738 return; | 714 return; |
739 case NEGATIVE_SUBMATCH_SUCCESS: | 715 case NEGATIVE_SUBMATCH_SUCCESS: |
740 // This case is handled in a different virtual method. | 716 // This case is handled in a different virtual method. |
741 UNREACHABLE(); | 717 UNREACHABLE(); |
742 } | 718 } |
743 UNIMPLEMENTED(); | 719 UNIMPLEMENTED(); |
744 } | 720 } |
745 | 721 |
746 | |
747 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { | 722 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { |
748 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); | 723 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); |
749 guards_->Add(guard); | 724 guards_->Add(guard); |
750 } | 725 } |
751 | 726 |
752 | |
753 ActionNode* ActionNode::SetRegister(intptr_t reg, | 727 ActionNode* ActionNode::SetRegister(intptr_t reg, |
754 intptr_t val, | 728 intptr_t val, |
755 RegExpNode* on_success) { | 729 RegExpNode* on_success) { |
756 ActionNode* result = | 730 ActionNode* result = |
757 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); | 731 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); |
758 result->data_.u_store_register.reg = reg; | 732 result->data_.u_store_register.reg = reg; |
759 result->data_.u_store_register.value = val; | 733 result->data_.u_store_register.value = val; |
760 return result; | 734 return result; |
761 } | 735 } |
762 | 736 |
763 | |
764 ActionNode* ActionNode::IncrementRegister(intptr_t reg, | 737 ActionNode* ActionNode::IncrementRegister(intptr_t reg, |
765 RegExpNode* on_success) { | 738 RegExpNode* on_success) { |
766 ActionNode* result = | 739 ActionNode* result = |
767 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); | 740 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); |
768 result->data_.u_increment_register.reg = reg; | 741 result->data_.u_increment_register.reg = reg; |
769 return result; | 742 return result; |
770 } | 743 } |
771 | 744 |
772 | |
773 ActionNode* ActionNode::StorePosition(intptr_t reg, | 745 ActionNode* ActionNode::StorePosition(intptr_t reg, |
774 bool is_capture, | 746 bool is_capture, |
775 RegExpNode* on_success) { | 747 RegExpNode* on_success) { |
776 ActionNode* result = | 748 ActionNode* result = |
777 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); | 749 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); |
778 result->data_.u_position_register.reg = reg; | 750 result->data_.u_position_register.reg = reg; |
779 result->data_.u_position_register.is_capture = is_capture; | 751 result->data_.u_position_register.is_capture = is_capture; |
780 return result; | 752 return result; |
781 } | 753 } |
782 | 754 |
783 | |
784 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { | 755 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { |
785 ActionNode* result = | 756 ActionNode* result = |
786 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); | 757 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); |
787 result->data_.u_clear_captures.range_from = range.from(); | 758 result->data_.u_clear_captures.range_from = range.from(); |
788 result->data_.u_clear_captures.range_to = range.to(); | 759 result->data_.u_clear_captures.range_to = range.to(); |
789 return result; | 760 return result; |
790 } | 761 } |
791 | 762 |
792 | |
793 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, | 763 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, |
794 intptr_t position_reg, | 764 intptr_t position_reg, |
795 RegExpNode* on_success) { | 765 RegExpNode* on_success) { |
796 ActionNode* result = | 766 ActionNode* result = |
797 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); | 767 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); |
798 result->data_.u_submatch.stack_pointer_register = stack_reg; | 768 result->data_.u_submatch.stack_pointer_register = stack_reg; |
799 result->data_.u_submatch.current_position_register = position_reg; | 769 result->data_.u_submatch.current_position_register = position_reg; |
800 return result; | 770 return result; |
801 } | 771 } |
802 | 772 |
803 | |
804 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, | 773 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, |
805 intptr_t position_reg, | 774 intptr_t position_reg, |
806 intptr_t clear_register_count, | 775 intptr_t clear_register_count, |
807 intptr_t clear_register_from, | 776 intptr_t clear_register_from, |
808 RegExpNode* on_success) { | 777 RegExpNode* on_success) { |
809 ActionNode* result = new (on_success->zone()) | 778 ActionNode* result = new (on_success->zone()) |
810 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); | 779 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
811 result->data_.u_submatch.stack_pointer_register = stack_reg; | 780 result->data_.u_submatch.stack_pointer_register = stack_reg; |
812 result->data_.u_submatch.current_position_register = position_reg; | 781 result->data_.u_submatch.current_position_register = position_reg; |
813 result->data_.u_submatch.clear_register_count = clear_register_count; | 782 result->data_.u_submatch.clear_register_count = clear_register_count; |
814 result->data_.u_submatch.clear_register_from = clear_register_from; | 783 result->data_.u_submatch.clear_register_from = clear_register_from; |
815 return result; | 784 return result; |
816 } | 785 } |
817 | 786 |
818 | |
819 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, | 787 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, |
820 intptr_t repetition_register, | 788 intptr_t repetition_register, |
821 intptr_t repetition_limit, | 789 intptr_t repetition_limit, |
822 RegExpNode* on_success) { | 790 RegExpNode* on_success) { |
823 ActionNode* result = | 791 ActionNode* result = |
824 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); | 792 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); |
825 result->data_.u_empty_match_check.start_register = start_register; | 793 result->data_.u_empty_match_check.start_register = start_register; |
826 result->data_.u_empty_match_check.repetition_register = repetition_register; | 794 result->data_.u_empty_match_check.repetition_register = repetition_register; |
827 result->data_.u_empty_match_check.repetition_limit = repetition_limit; | 795 result->data_.u_empty_match_check.repetition_limit = repetition_limit; |
828 return result; | 796 return result; |
829 } | 797 } |
830 | 798 |
831 | |
832 #define DEFINE_ACCEPT(Type) \ | 799 #define DEFINE_ACCEPT(Type) \ |
833 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } | 800 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } |
834 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 801 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
835 #undef DEFINE_ACCEPT | 802 #undef DEFINE_ACCEPT |
836 | 803 |
837 | |
838 void LoopChoiceNode::Accept(NodeVisitor* visitor) { | 804 void LoopChoiceNode::Accept(NodeVisitor* visitor) { |
839 visitor->VisitLoopChoice(this); | 805 visitor->VisitLoopChoice(this); |
840 } | 806 } |
841 | 807 |
842 | |
843 // ------------------------------------------------------------------- | 808 // ------------------------------------------------------------------- |
844 // Emit code. | 809 // Emit code. |
845 | 810 |
846 | |
847 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 811 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
848 Guard* guard, | 812 Guard* guard, |
849 Trace* trace) { | 813 Trace* trace) { |
850 switch (guard->op()) { | 814 switch (guard->op()) { |
851 case Guard::LT: | 815 case Guard::LT: |
852 ASSERT(!trace->mentions_reg(guard->reg())); | 816 ASSERT(!trace->mentions_reg(guard->reg())); |
853 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), | 817 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), |
854 trace->backtrack()); | 818 trace->backtrack()); |
855 break; | 819 break; |
856 case Guard::GEQ: | 820 case Guard::GEQ: |
857 ASSERT(!trace->mentions_reg(guard->reg())); | 821 ASSERT(!trace->mentions_reg(guard->reg())); |
858 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), | 822 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), |
859 trace->backtrack()); | 823 trace->backtrack()); |
860 break; | 824 break; |
861 } | 825 } |
862 } | 826 } |
863 | 827 |
864 | |
865 // Returns the number of characters in the equivalence class, omitting those | 828 // Returns the number of characters in the equivalence class, omitting those |
866 // that cannot occur in the source string because it is ASCII. | 829 // that cannot occur in the source string because it is ASCII. |
867 static intptr_t GetCaseIndependentLetters(uint16_t character, | 830 static intptr_t GetCaseIndependentLetters(uint16_t character, |
868 bool one_byte_subject, | 831 bool one_byte_subject, |
869 int32_t* letters) { | 832 int32_t* letters) { |
870 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; | 833 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; |
871 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); | 834 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); |
872 // Unibrow returns 0 or 1 for characters where case independence is | 835 // Unibrow returns 0 or 1 for characters where case independence is |
873 // trivial. | 836 // trivial. |
874 if (length == 0) { | 837 if (length == 0) { |
875 letters[0] = character; | 838 letters[0] = character; |
876 length = 1; | 839 length = 1; |
877 } | 840 } |
878 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { | 841 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { |
879 return length; | 842 return length; |
880 } | 843 } |
881 | 844 |
882 // The standard requires that non-ASCII characters cannot have ASCII | 845 // The standard requires that non-ASCII characters cannot have ASCII |
883 // character codes in their equivalence class. | 846 // character codes in their equivalence class. |
884 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, | 847 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, |
885 // is it? For example, \u00C5 is equivalent to \u212B. | 848 // is it? For example, \u00C5 is equivalent to \u212B. |
886 return 0; | 849 return 0; |
887 } | 850 } |
888 | 851 |
889 | |
890 static inline bool EmitSimpleCharacter(Zone* zone, | 852 static inline bool EmitSimpleCharacter(Zone* zone, |
891 RegExpCompiler* compiler, | 853 RegExpCompiler* compiler, |
892 uint16_t c, | 854 uint16_t c, |
893 BlockLabel* on_failure, | 855 BlockLabel* on_failure, |
894 intptr_t cp_offset, | 856 intptr_t cp_offset, |
895 bool check, | 857 bool check, |
896 bool preloaded) { | 858 bool preloaded) { |
897 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 859 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
898 bool bound_checked = false; | 860 bool bound_checked = false; |
899 if (!preloaded) { | 861 if (!preloaded) { |
900 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 862 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
901 bound_checked = true; | 863 bound_checked = true; |
902 } | 864 } |
903 assembler->CheckNotCharacter(c, on_failure); | 865 assembler->CheckNotCharacter(c, on_failure); |
904 return bound_checked; | 866 return bound_checked; |
905 } | 867 } |
906 | 868 |
907 | |
908 // Only emits non-letters (things that don't have case). Only used for case | 869 // Only emits non-letters (things that don't have case). Only used for case |
909 // independent matches. | 870 // independent matches. |
910 static inline bool EmitAtomNonLetter(Zone* zone, | 871 static inline bool EmitAtomNonLetter(Zone* zone, |
911 RegExpCompiler* compiler, | 872 RegExpCompiler* compiler, |
912 uint16_t c, | 873 uint16_t c, |
913 BlockLabel* on_failure, | 874 BlockLabel* on_failure, |
914 intptr_t cp_offset, | 875 intptr_t cp_offset, |
915 bool check, | 876 bool check, |
916 bool preloaded) { | 877 bool preloaded) { |
917 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 878 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
(...skipping 15 matching lines...) Expand all Loading... |
933 } | 894 } |
934 if (!preloaded) { | 895 if (!preloaded) { |
935 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 896 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
936 checked = check; | 897 checked = check; |
937 } | 898 } |
938 macro_assembler->CheckNotCharacter(c, on_failure); | 899 macro_assembler->CheckNotCharacter(c, on_failure); |
939 } | 900 } |
940 return checked; | 901 return checked; |
941 } | 902 } |
942 | 903 |
943 | |
944 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 904 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
945 bool one_byte, | 905 bool one_byte, |
946 uint16_t c1, | 906 uint16_t c1, |
947 uint16_t c2, | 907 uint16_t c2, |
948 BlockLabel* on_failure) { | 908 BlockLabel* on_failure) { |
949 uint16_t char_mask; | 909 uint16_t char_mask; |
950 if (one_byte) { | 910 if (one_byte) { |
951 char_mask = Symbols::kMaxOneCharCodeSymbol; | 911 char_mask = Symbols::kMaxOneCharCodeSymbol; |
952 } else { | 912 } else { |
953 char_mask = Utf16::kMaxCodeUnit; | 913 char_mask = Utf16::kMaxCodeUnit; |
(...skipping 16 matching lines...) Expand all Loading... |
970 // trick. We avoid the theoretical case where negative numbers are | 930 // trick. We avoid the theoretical case where negative numbers are |
971 // involved in order to simplify code generation. | 931 // involved in order to simplify code generation. |
972 uint16_t mask = char_mask ^ diff; | 932 uint16_t mask = char_mask ^ diff; |
973 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, | 933 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, |
974 on_failure); | 934 on_failure); |
975 return true; | 935 return true; |
976 } | 936 } |
977 return false; | 937 return false; |
978 } | 938 } |
979 | 939 |
980 | |
981 typedef bool EmitCharacterFunction(Zone* zone, | 940 typedef bool EmitCharacterFunction(Zone* zone, |
982 RegExpCompiler* compiler, | 941 RegExpCompiler* compiler, |
983 uint16_t c, | 942 uint16_t c, |
984 BlockLabel* on_failure, | 943 BlockLabel* on_failure, |
985 intptr_t cp_offset, | 944 intptr_t cp_offset, |
986 bool check, | 945 bool check, |
987 bool preloaded); | 946 bool preloaded); |
988 | 947 |
989 // Only emits letters (things that have case). Only used for case independent | 948 // Only emits letters (things that have case). Only used for case independent |
990 // matches. | 949 // matches. |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1027 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 986 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
1028 macro_assembler->BindBlock(&ok); | 987 macro_assembler->BindBlock(&ok); |
1029 break; | 988 break; |
1030 default: | 989 default: |
1031 UNREACHABLE(); | 990 UNREACHABLE(); |
1032 break; | 991 break; |
1033 } | 992 } |
1034 return true; | 993 return true; |
1035 } | 994 } |
1036 | 995 |
1037 | |
1038 static void EmitBoundaryTest(RegExpMacroAssembler* masm, | 996 static void EmitBoundaryTest(RegExpMacroAssembler* masm, |
1039 intptr_t border, | 997 intptr_t border, |
1040 BlockLabel* fall_through, | 998 BlockLabel* fall_through, |
1041 BlockLabel* above_or_equal, | 999 BlockLabel* above_or_equal, |
1042 BlockLabel* below) { | 1000 BlockLabel* below) { |
1043 if (below != fall_through) { | 1001 if (below != fall_through) { |
1044 masm->CheckCharacterLT(border, below); | 1002 masm->CheckCharacterLT(border, below); |
1045 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); | 1003 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); |
1046 } else { | 1004 } else { |
1047 masm->CheckCharacterGT(border - 1, above_or_equal); | 1005 masm->CheckCharacterGT(border - 1, above_or_equal); |
1048 } | 1006 } |
1049 } | 1007 } |
1050 | 1008 |
1051 | |
1052 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, | 1009 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, |
1053 intptr_t first, | 1010 intptr_t first, |
1054 intptr_t last, | 1011 intptr_t last, |
1055 BlockLabel* fall_through, | 1012 BlockLabel* fall_through, |
1056 BlockLabel* in_range, | 1013 BlockLabel* in_range, |
1057 BlockLabel* out_of_range) { | 1014 BlockLabel* out_of_range) { |
1058 if (in_range == fall_through) { | 1015 if (in_range == fall_through) { |
1059 if (first == last) { | 1016 if (first == last) { |
1060 masm->CheckNotCharacter(first, out_of_range); | 1017 masm->CheckNotCharacter(first, out_of_range); |
1061 } else { | 1018 } else { |
1062 masm->CheckCharacterNotInRange(first, last, out_of_range); | 1019 masm->CheckCharacterNotInRange(first, last, out_of_range); |
1063 } | 1020 } |
1064 } else { | 1021 } else { |
1065 if (first == last) { | 1022 if (first == last) { |
1066 masm->CheckCharacter(first, in_range); | 1023 masm->CheckCharacter(first, in_range); |
1067 } else { | 1024 } else { |
1068 masm->CheckCharacterInRange(first, last, in_range); | 1025 masm->CheckCharacterInRange(first, last, in_range); |
1069 } | 1026 } |
1070 if (out_of_range != fall_through) masm->GoTo(out_of_range); | 1027 if (out_of_range != fall_through) masm->GoTo(out_of_range); |
1071 } | 1028 } |
1072 } | 1029 } |
1073 | 1030 |
1074 | |
1075 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | 1031 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. |
1076 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | 1032 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. |
1077 static void EmitUseLookupTable(RegExpMacroAssembler* masm, | 1033 static void EmitUseLookupTable(RegExpMacroAssembler* masm, |
1078 ZoneGrowableArray<int>* ranges, | 1034 ZoneGrowableArray<int>* ranges, |
1079 intptr_t start_index, | 1035 intptr_t start_index, |
1080 intptr_t end_index, | 1036 intptr_t end_index, |
1081 intptr_t min_char, | 1037 intptr_t min_char, |
1082 BlockLabel* fall_through, | 1038 BlockLabel* fall_through, |
1083 BlockLabel* even_label, | 1039 BlockLabel* even_label, |
1084 BlockLabel* odd_label) { | 1040 BlockLabel* odd_label) { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1124 // TODO(erikcorry): Cache these. | 1080 // TODO(erikcorry): Cache these. |
1125 const TypedData& ba = TypedData::ZoneHandle( | 1081 const TypedData& ba = TypedData::ZoneHandle( |
1126 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | 1082 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
1127 for (intptr_t i = 0; i < kSize; i++) { | 1083 for (intptr_t i = 0; i < kSize; i++) { |
1128 ba.SetUint8(i, templ[i]); | 1084 ba.SetUint8(i, templ[i]); |
1129 } | 1085 } |
1130 masm->CheckBitInTable(ba, on_bit_set); | 1086 masm->CheckBitInTable(ba, on_bit_set); |
1131 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | 1087 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); |
1132 } | 1088 } |
1133 | 1089 |
1134 | |
1135 static void CutOutRange(RegExpMacroAssembler* masm, | 1090 static void CutOutRange(RegExpMacroAssembler* masm, |
1136 ZoneGrowableArray<int>* ranges, | 1091 ZoneGrowableArray<int>* ranges, |
1137 intptr_t start_index, | 1092 intptr_t start_index, |
1138 intptr_t end_index, | 1093 intptr_t end_index, |
1139 intptr_t cut_index, | 1094 intptr_t cut_index, |
1140 BlockLabel* even_label, | 1095 BlockLabel* even_label, |
1141 BlockLabel* odd_label) { | 1096 BlockLabel* odd_label) { |
1142 bool odd = (((cut_index - start_index) & 1) == 1); | 1097 bool odd = (((cut_index - start_index) & 1) == 1); |
1143 BlockLabel* in_range_label = odd ? odd_label : even_label; | 1098 BlockLabel* in_range_label = odd ? odd_label : even_label; |
1144 BlockLabel dummy; | 1099 BlockLabel dummy; |
1145 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), | 1100 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), |
1146 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, | 1101 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, |
1147 &dummy); | 1102 &dummy); |
1148 ASSERT(!dummy.IsLinked()); | 1103 ASSERT(!dummy.IsLinked()); |
1149 // Cut out the single range by rewriting the array. This creates a new | 1104 // Cut out the single range by rewriting the array. This creates a new |
1150 // range that is a merger of the two ranges on either side of the one we | 1105 // range that is a merger of the two ranges on either side of the one we |
1151 // are cutting out. The oddity of the labels is preserved. | 1106 // are cutting out. The oddity of the labels is preserved. |
1152 for (intptr_t j = cut_index; j > start_index; j--) { | 1107 for (intptr_t j = cut_index; j > start_index; j--) { |
1153 (*ranges)[j] = ranges->At(j - 1); | 1108 (*ranges)[j] = ranges->At(j - 1); |
1154 } | 1109 } |
1155 for (intptr_t j = cut_index + 1; j < end_index; j++) { | 1110 for (intptr_t j = cut_index + 1; j < end_index; j++) { |
1156 (*ranges)[j] = ranges->At(j + 1); | 1111 (*ranges)[j] = ranges->At(j + 1); |
1157 } | 1112 } |
1158 } | 1113 } |
1159 | 1114 |
1160 | |
1161 // Unicode case. Split the search space into kSize spaces that are handled | 1115 // Unicode case. Split the search space into kSize spaces that are handled |
1162 // with recursion. | 1116 // with recursion. |
1163 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, | 1117 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, |
1164 intptr_t start_index, | 1118 intptr_t start_index, |
1165 intptr_t end_index, | 1119 intptr_t end_index, |
1166 intptr_t* new_start_index, | 1120 intptr_t* new_start_index, |
1167 intptr_t* new_end_index, | 1121 intptr_t* new_end_index, |
1168 intptr_t* border) { | 1122 intptr_t* border) { |
1169 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 1123 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
1170 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; | 1124 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1216 if (ranges->At(*new_end_index) == *border) { | 1170 if (ranges->At(*new_end_index) == *border) { |
1217 (*new_end_index)--; | 1171 (*new_end_index)--; |
1218 } | 1172 } |
1219 if (*border >= ranges->At(end_index)) { | 1173 if (*border >= ranges->At(end_index)) { |
1220 *border = ranges->At(end_index); | 1174 *border = ranges->At(end_index); |
1221 *new_start_index = end_index; // Won't be used. | 1175 *new_start_index = end_index; // Won't be used. |
1222 *new_end_index = end_index - 1; | 1176 *new_end_index = end_index - 1; |
1223 } | 1177 } |
1224 } | 1178 } |
1225 | 1179 |
1226 | |
1227 // Gets a series of segment boundaries representing a character class. If the | 1180 // Gets a series of segment boundaries representing a character class. If the |
1228 // character is in the range between an even and an odd boundary (counting from | 1181 // character is in the range between an even and an odd boundary (counting from |
1229 // start_index) then go to even_label, otherwise go to odd_label. We already | 1182 // start_index) then go to even_label, otherwise go to odd_label. We already |
1230 // know that the character is in the range of min_char to max_char inclusive. | 1183 // know that the character is in the range of min_char to max_char inclusive. |
1231 // Either label can be NULL indicating backtracking. Either label can also be | 1184 // Either label can be NULL indicating backtracking. Either label can also be |
1232 // equal to the fall_through label. | 1185 // equal to the fall_through label. |
1233 static void GenerateBranches(RegExpMacroAssembler* masm, | 1186 static void GenerateBranches(RegExpMacroAssembler* masm, |
1234 ZoneGrowableArray<int>* ranges, | 1187 ZoneGrowableArray<int>* ranges, |
1235 intptr_t start_index, | 1188 intptr_t start_index, |
1236 intptr_t end_index, | 1189 intptr_t end_index, |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1337 | 1290 |
1338 if (handle_rest.IsLinked()) { | 1291 if (handle_rest.IsLinked()) { |
1339 masm->BindBlock(&handle_rest); | 1292 masm->BindBlock(&handle_rest); |
1340 bool flip = (new_start_index & 1) != (start_index & 1); | 1293 bool flip = (new_start_index & 1) != (start_index & 1); |
1341 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, | 1294 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, |
1342 &dummy, flip ? odd_label : even_label, | 1295 &dummy, flip ? odd_label : even_label, |
1343 flip ? even_label : odd_label); | 1296 flip ? even_label : odd_label); |
1344 } | 1297 } |
1345 } | 1298 } |
1346 | 1299 |
1347 | |
1348 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1300 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1349 RegExpCharacterClass* cc, | 1301 RegExpCharacterClass* cc, |
1350 bool one_byte, | 1302 bool one_byte, |
1351 BlockLabel* on_failure, | 1303 BlockLabel* on_failure, |
1352 intptr_t cp_offset, | 1304 intptr_t cp_offset, |
1353 bool check_offset, | 1305 bool check_offset, |
1354 bool preloaded, | 1306 bool preloaded, |
1355 Zone* zone) { | 1307 Zone* zone) { |
1356 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 1308 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
1357 if (!CharacterRange::IsCanonical(ranges)) { | 1309 if (!CharacterRange::IsCanonical(ranges)) { |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1403 if (check_offset) { | 1355 if (check_offset) { |
1404 macro_assembler->CheckPosition(cp_offset, on_failure); | 1356 macro_assembler->CheckPosition(cp_offset, on_failure); |
1405 } | 1357 } |
1406 return; | 1358 return; |
1407 } | 1359 } |
1408 | 1360 |
1409 if (!preloaded) { | 1361 if (!preloaded) { |
1410 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1362 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
1411 } | 1363 } |
1412 | 1364 |
1413 if (cc->is_standard() && | 1365 if (cc->is_standard() && macro_assembler->CheckSpecialCharacterClass( |
1414 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1366 cc->standard_type(), on_failure)) { |
1415 on_failure)) { | |
1416 return; | 1367 return; |
1417 } | 1368 } |
1418 | 1369 |
1419 | |
1420 // A new list with ascending entries. Each entry is a code unit | 1370 // A new list with ascending entries. Each entry is a code unit |
1421 // where there is a boundary between code units that are part of | 1371 // where there is a boundary between code units that are part of |
1422 // the class and code units that are not. Normally we insert an | 1372 // the class and code units that are not. Normally we insert an |
1423 // entry at zero which goes to the failure label, but if there | 1373 // entry at zero which goes to the failure label, but if there |
1424 // was already one there we fall through for success on that entry. | 1374 // was already one there we fall through for success on that entry. |
1425 // Subsequent entries have alternating meaning (success/failure). | 1375 // Subsequent entries have alternating meaning (success/failure). |
1426 ZoneGrowableArray<int>* range_boundaries = | 1376 ZoneGrowableArray<int>* range_boundaries = |
1427 new (zone) ZoneGrowableArray<int>(last_valid_range); | 1377 new (zone) ZoneGrowableArray<int>(last_valid_range); |
1428 | 1378 |
1429 bool zeroth_entry_is_failure = !cc->is_negated(); | 1379 bool zeroth_entry_is_failure = !cc->is_negated(); |
(...skipping 17 matching lines...) Expand all Loading... |
1447 GenerateBranches(macro_assembler, range_boundaries, | 1397 GenerateBranches(macro_assembler, range_boundaries, |
1448 0, // start_index. | 1398 0, // start_index. |
1449 end_index, | 1399 end_index, |
1450 0, // min_char. | 1400 0, // min_char. |
1451 max_char, &fall_through, | 1401 max_char, &fall_through, |
1452 zeroth_entry_is_failure ? &fall_through : on_failure, | 1402 zeroth_entry_is_failure ? &fall_through : on_failure, |
1453 zeroth_entry_is_failure ? on_failure : &fall_through); | 1403 zeroth_entry_is_failure ? on_failure : &fall_through); |
1454 macro_assembler->BindBlock(&fall_through); | 1404 macro_assembler->BindBlock(&fall_through); |
1455 } | 1405 } |
1456 | 1406 |
1457 | |
1458 RegExpNode::~RegExpNode() {} | 1407 RegExpNode::~RegExpNode() {} |
1459 | 1408 |
1460 | |
1461 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1409 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
1462 Trace* trace) { | 1410 Trace* trace) { |
1463 // If we are generating a greedy loop then don't stop and don't reuse code. | 1411 // If we are generating a greedy loop then don't stop and don't reuse code. |
1464 if (trace->stop_node() != NULL) { | 1412 if (trace->stop_node() != NULL) { |
1465 return CONTINUE; | 1413 return CONTINUE; |
1466 } | 1414 } |
1467 | 1415 |
1468 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1416 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1469 if (trace->is_trivial()) { | 1417 if (trace->is_trivial()) { |
1470 if (label_.IsBound()) { | 1418 if (label_.IsBound()) { |
(...skipping 22 matching lines...) Expand all Loading... |
1493 return CONTINUE; | 1441 return CONTINUE; |
1494 } | 1442 } |
1495 | 1443 |
1496 // If we get here code has been generated for this node too many times or | 1444 // If we get here code has been generated for this node too many times or |
1497 // recursion is too deep. Time to switch to a generic version. The code for | 1445 // recursion is too deep. Time to switch to a generic version. The code for |
1498 // generic versions above can handle deep recursion properly. | 1446 // generic versions above can handle deep recursion properly. |
1499 trace->Flush(compiler, this); | 1447 trace->Flush(compiler, this); |
1500 return DONE; | 1448 return DONE; |
1501 } | 1449 } |
1502 | 1450 |
1503 | |
1504 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, | 1451 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, |
1505 intptr_t budget, | 1452 intptr_t budget, |
1506 bool not_at_start) { | 1453 bool not_at_start) { |
1507 if (budget <= 0) return 0; | 1454 if (budget <= 0) return 0; |
1508 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 1455 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
1509 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1456 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1510 } | 1457 } |
1511 | 1458 |
1512 | |
1513 void ActionNode::FillInBMInfo(intptr_t offset, | 1459 void ActionNode::FillInBMInfo(intptr_t offset, |
1514 intptr_t budget, | 1460 intptr_t budget, |
1515 BoyerMooreLookahead* bm, | 1461 BoyerMooreLookahead* bm, |
1516 bool not_at_start) { | 1462 bool not_at_start) { |
1517 if (action_type_ == BEGIN_SUBMATCH) { | 1463 if (action_type_ == BEGIN_SUBMATCH) { |
1518 bm->SetRest(offset); | 1464 bm->SetRest(offset); |
1519 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 1465 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
1520 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1466 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1521 } | 1467 } |
1522 SaveBMInfo(bm, not_at_start, offset); | 1468 SaveBMInfo(bm, not_at_start, offset); |
1523 } | 1469 } |
1524 | 1470 |
1525 | |
1526 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, | 1471 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, |
1527 intptr_t budget, | 1472 intptr_t budget, |
1528 bool not_at_start) { | 1473 bool not_at_start) { |
1529 if (budget <= 0) return 0; | 1474 if (budget <= 0) return 0; |
1530 // If we know we are not at the start and we are asked "how many characters | 1475 // If we know we are not at the start and we are asked "how many characters |
1531 // will you match if you succeed?" then we can answer anything since false | 1476 // will you match if you succeed?" then we can answer anything since false |
1532 // implies false. So lets just return the max answer (still_to_find) since | 1477 // implies false. So lets just return the max answer (still_to_find) since |
1533 // that won't prevent us from preloading a lot of characters for the other | 1478 // that won't prevent us from preloading a lot of characters for the other |
1534 // branches in the node graph. | 1479 // branches in the node graph. |
1535 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 1480 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
1536 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1481 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1537 } | 1482 } |
1538 | 1483 |
1539 | |
1540 void AssertionNode::FillInBMInfo(intptr_t offset, | 1484 void AssertionNode::FillInBMInfo(intptr_t offset, |
1541 intptr_t budget, | 1485 intptr_t budget, |
1542 BoyerMooreLookahead* bm, | 1486 BoyerMooreLookahead* bm, |
1543 bool not_at_start) { | 1487 bool not_at_start) { |
1544 // Match the behaviour of EatsAtLeast on this node. | 1488 // Match the behaviour of EatsAtLeast on this node. |
1545 if (assertion_type() == AT_START && not_at_start) return; | 1489 if (assertion_type() == AT_START && not_at_start) return; |
1546 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1490 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1547 SaveBMInfo(bm, not_at_start, offset); | 1491 SaveBMInfo(bm, not_at_start, offset); |
1548 } | 1492 } |
1549 | 1493 |
1550 | |
1551 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, | 1494 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, |
1552 intptr_t budget, | 1495 intptr_t budget, |
1553 bool not_at_start) { | 1496 bool not_at_start) { |
1554 if (budget <= 0) return 0; | 1497 if (budget <= 0) return 0; |
1555 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1498 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1556 } | 1499 } |
1557 | 1500 |
1558 | |
1559 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, | 1501 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, |
1560 intptr_t budget, | 1502 intptr_t budget, |
1561 bool not_at_start) { | 1503 bool not_at_start) { |
1562 intptr_t answer = Length(); | 1504 intptr_t answer = Length(); |
1563 if (answer >= still_to_find) return answer; | 1505 if (answer >= still_to_find) return answer; |
1564 if (budget <= 0) return answer; | 1506 if (budget <= 0) return answer; |
1565 // We are not at start after this node so we set the last argument to 'true'. | 1507 // We are not at start after this node so we set the last argument to 'true'. |
1566 return answer + | 1508 return answer + |
1567 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); | 1509 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); |
1568 } | 1510 } |
1569 | 1511 |
1570 | |
1571 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1512 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1572 intptr_t budget, | 1513 intptr_t budget, |
1573 bool not_at_start) { | 1514 bool not_at_start) { |
1574 if (budget <= 0) return 0; | 1515 if (budget <= 0) return 0; |
1575 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1516 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1576 // afterwards. | 1517 // afterwards. |
1577 RegExpNode* node = (*alternatives_)[1].node(); | 1518 RegExpNode* node = (*alternatives_)[1].node(); |
1578 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1519 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1579 } | 1520 } |
1580 | 1521 |
1581 | |
1582 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( | 1522 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( |
1583 QuickCheckDetails* details, | 1523 QuickCheckDetails* details, |
1584 RegExpCompiler* compiler, | 1524 RegExpCompiler* compiler, |
1585 intptr_t filled_in, | 1525 intptr_t filled_in, |
1586 bool not_at_start) { | 1526 bool not_at_start) { |
1587 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1527 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1588 // afterwards. | 1528 // afterwards. |
1589 RegExpNode* node = (*alternatives_)[1].node(); | 1529 RegExpNode* node = (*alternatives_)[1].node(); |
1590 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); | 1530 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); |
1591 } | 1531 } |
1592 | 1532 |
1593 | |
1594 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, | 1533 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, |
1595 intptr_t budget, | 1534 intptr_t budget, |
1596 RegExpNode* ignore_this_node, | 1535 RegExpNode* ignore_this_node, |
1597 bool not_at_start) { | 1536 bool not_at_start) { |
1598 if (budget <= 0) return 0; | 1537 if (budget <= 0) return 0; |
1599 intptr_t min = 100; | 1538 intptr_t min = 100; |
1600 intptr_t choice_count = alternatives_->length(); | 1539 intptr_t choice_count = alternatives_->length(); |
1601 budget = (budget - 1) / choice_count; | 1540 budget = (budget - 1) / choice_count; |
1602 for (intptr_t i = 0; i < choice_count; i++) { | 1541 for (intptr_t i = 0; i < choice_count; i++) { |
1603 RegExpNode* node = (*alternatives_)[i].node(); | 1542 RegExpNode* node = (*alternatives_)[i].node(); |
1604 if (node == ignore_this_node) continue; | 1543 if (node == ignore_this_node) continue; |
1605 intptr_t node_eats_at_least = | 1544 intptr_t node_eats_at_least = |
1606 node->EatsAtLeast(still_to_find, budget, not_at_start); | 1545 node->EatsAtLeast(still_to_find, budget, not_at_start); |
1607 if (node_eats_at_least < min) min = node_eats_at_least; | 1546 if (node_eats_at_least < min) min = node_eats_at_least; |
1608 if (min == 0) return 0; | 1547 if (min == 0) return 0; |
1609 } | 1548 } |
1610 return min; | 1549 return min; |
1611 } | 1550 } |
1612 | 1551 |
1613 | |
1614 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1552 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1615 intptr_t budget, | 1553 intptr_t budget, |
1616 bool not_at_start) { | 1554 bool not_at_start) { |
1617 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); | 1555 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); |
1618 } | 1556 } |
1619 | 1557 |
1620 | |
1621 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1558 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1622 intptr_t budget, | 1559 intptr_t budget, |
1623 bool not_at_start) { | 1560 bool not_at_start) { |
1624 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); | 1561 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); |
1625 } | 1562 } |
1626 | 1563 |
1627 | |
1628 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 1564 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
1629 static inline uint32_t SmearBitsRight(uint32_t v) { | 1565 static inline uint32_t SmearBitsRight(uint32_t v) { |
1630 v |= v >> 1; | 1566 v |= v >> 1; |
1631 v |= v >> 2; | 1567 v |= v >> 2; |
1632 v |= v >> 4; | 1568 v |= v >> 4; |
1633 v |= v >> 8; | 1569 v |= v >> 8; |
1634 v |= v >> 16; | 1570 v |= v >> 16; |
1635 return v; | 1571 return v; |
1636 } | 1572 } |
1637 | 1573 |
1638 | |
1639 bool QuickCheckDetails::Rationalize(bool asc) { | 1574 bool QuickCheckDetails::Rationalize(bool asc) { |
1640 bool found_useful_op = false; | 1575 bool found_useful_op = false; |
1641 uint32_t char_mask; | 1576 uint32_t char_mask; |
1642 if (asc) { | 1577 if (asc) { |
1643 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1578 char_mask = Symbols::kMaxOneCharCodeSymbol; |
1644 } else { | 1579 } else { |
1645 char_mask = Utf16::kMaxCodeUnit; | 1580 char_mask = Utf16::kMaxCodeUnit; |
1646 } | 1581 } |
1647 mask_ = 0; | 1582 mask_ = 0; |
1648 value_ = 0; | 1583 value_ = 0; |
1649 intptr_t char_shift = 0; | 1584 intptr_t char_shift = 0; |
1650 for (intptr_t i = 0; i < characters_; i++) { | 1585 for (intptr_t i = 0; i < characters_; i++) { |
1651 Position* pos = &positions_[i]; | 1586 Position* pos = &positions_[i]; |
1652 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { | 1587 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { |
1653 found_useful_op = true; | 1588 found_useful_op = true; |
1654 } | 1589 } |
1655 mask_ |= (pos->mask & char_mask) << char_shift; | 1590 mask_ |= (pos->mask & char_mask) << char_shift; |
1656 value_ |= (pos->value & char_mask) << char_shift; | 1591 value_ |= (pos->value & char_mask) << char_shift; |
1657 char_shift += asc ? 8 : 16; | 1592 char_shift += asc ? 8 : 16; |
1658 } | 1593 } |
1659 return found_useful_op; | 1594 return found_useful_op; |
1660 } | 1595 } |
1661 | 1596 |
1662 | |
1663 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 1597 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
1664 Trace* bounds_check_trace, | 1598 Trace* bounds_check_trace, |
1665 Trace* trace, | 1599 Trace* trace, |
1666 bool preload_has_checked_bounds, | 1600 bool preload_has_checked_bounds, |
1667 BlockLabel* on_possible_success, | 1601 BlockLabel* on_possible_success, |
1668 QuickCheckDetails* details, | 1602 QuickCheckDetails* details, |
1669 bool fall_through_on_failure) { | 1603 bool fall_through_on_failure) { |
1670 if (details->characters() == 0) return false; | 1604 if (details->characters() == 0) return false; |
1671 GetQuickCheckDetails(details, compiler, 0, | 1605 GetQuickCheckDetails(details, compiler, 0, |
1672 trace->at_start() == Trace::FALSE_VALUE); | 1606 trace->at_start() == Trace::FALSE_VALUE); |
(...skipping 10 matching lines...) Expand all Loading... |
1683 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); | 1617 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); |
1684 // We are attempting to preload the minimum number of characters | 1618 // We are attempting to preload the minimum number of characters |
1685 // any choice would eat, so if the bounds check fails, then none of the | 1619 // any choice would eat, so if the bounds check fails, then none of the |
1686 // choices can succeed, so we can just immediately backtrack, rather | 1620 // choices can succeed, so we can just immediately backtrack, rather |
1687 // than go to the next choice. | 1621 // than go to the next choice. |
1688 assembler->LoadCurrentCharacter( | 1622 assembler->LoadCurrentCharacter( |
1689 trace->cp_offset(), bounds_check_trace->backtrack(), | 1623 trace->cp_offset(), bounds_check_trace->backtrack(), |
1690 !preload_has_checked_bounds, details->characters()); | 1624 !preload_has_checked_bounds, details->characters()); |
1691 } | 1625 } |
1692 | 1626 |
1693 | |
1694 bool need_mask = true; | 1627 bool need_mask = true; |
1695 | 1628 |
1696 if (details->characters() == 1) { | 1629 if (details->characters() == 1) { |
1697 // If number of characters preloaded is 1 then we used a byte or 16 bit | 1630 // If number of characters preloaded is 1 then we used a byte or 16 bit |
1698 // load so the value is already masked down. | 1631 // load so the value is already masked down. |
1699 uint32_t char_mask; | 1632 uint32_t char_mask; |
1700 if (compiler->one_byte()) { | 1633 if (compiler->one_byte()) { |
1701 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1634 char_mask = Symbols::kMaxOneCharCodeSymbol; |
1702 } else { | 1635 } else { |
1703 char_mask = Utf16::kMaxCodeUnit; | 1636 char_mask = Utf16::kMaxCodeUnit; |
(...skipping 21 matching lines...) Expand all Loading... |
1725 } else { | 1658 } else { |
1726 if (need_mask) { | 1659 if (need_mask) { |
1727 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); | 1660 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); |
1728 } else { | 1661 } else { |
1729 assembler->CheckNotCharacter(value, trace->backtrack()); | 1662 assembler->CheckNotCharacter(value, trace->backtrack()); |
1730 } | 1663 } |
1731 } | 1664 } |
1732 return true; | 1665 return true; |
1733 } | 1666 } |
1734 | 1667 |
1735 | |
1736 // Here is the meat of GetQuickCheckDetails (see also the comment on the | 1668 // Here is the meat of GetQuickCheckDetails (see also the comment on the |
1737 // super-class in the .h file). | 1669 // super-class in the .h file). |
1738 // | 1670 // |
1739 // We iterate along the text object, building up for each character a | 1671 // We iterate along the text object, building up for each character a |
1740 // mask and value that can be used to test for a quick failure to match. | 1672 // mask and value that can be used to test for a quick failure to match. |
1741 // The masks and values for the positions will be combined into a single | 1673 // The masks and values for the positions will be combined into a single |
1742 // machine word for the current character width in order to be used in | 1674 // machine word for the current character width in order to be used in |
1743 // generating a quick check. | 1675 // generating a quick check. |
1744 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 1676 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
1745 RegExpCompiler* compiler, | 1677 RegExpCompiler* compiler, |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1889 } | 1821 } |
1890 } | 1822 } |
1891 } | 1823 } |
1892 ASSERT(characters_filled_in != details->characters()); | 1824 ASSERT(characters_filled_in != details->characters()); |
1893 if (!details->cannot_match()) { | 1825 if (!details->cannot_match()) { |
1894 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, | 1826 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, |
1895 true); | 1827 true); |
1896 } | 1828 } |
1897 } | 1829 } |
1898 | 1830 |
1899 | |
1900 void QuickCheckDetails::Clear() { | 1831 void QuickCheckDetails::Clear() { |
1901 for (int i = 0; i < characters_; i++) { | 1832 for (int i = 0; i < characters_; i++) { |
1902 positions_[i].mask = 0; | 1833 positions_[i].mask = 0; |
1903 positions_[i].value = 0; | 1834 positions_[i].value = 0; |
1904 positions_[i].determines_perfectly = false; | 1835 positions_[i].determines_perfectly = false; |
1905 } | 1836 } |
1906 characters_ = 0; | 1837 characters_ = 0; |
1907 } | 1838 } |
1908 | 1839 |
1909 | |
1910 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { | 1840 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { |
1911 ASSERT(by >= 0); | 1841 ASSERT(by >= 0); |
1912 if (by >= characters_) { | 1842 if (by >= characters_) { |
1913 Clear(); | 1843 Clear(); |
1914 return; | 1844 return; |
1915 } | 1845 } |
1916 for (intptr_t i = 0; i < characters_ - by; i++) { | 1846 for (intptr_t i = 0; i < characters_ - by; i++) { |
1917 positions_[i] = positions_[by + i]; | 1847 positions_[i] = positions_[by + i]; |
1918 } | 1848 } |
1919 for (intptr_t i = characters_ - by; i < characters_; i++) { | 1849 for (intptr_t i = characters_ - by; i < characters_; i++) { |
1920 positions_[i].mask = 0; | 1850 positions_[i].mask = 0; |
1921 positions_[i].value = 0; | 1851 positions_[i].value = 0; |
1922 positions_[i].determines_perfectly = false; | 1852 positions_[i].determines_perfectly = false; |
1923 } | 1853 } |
1924 characters_ -= by; | 1854 characters_ -= by; |
1925 // We could change mask_ and value_ here but we would never advance unless | 1855 // We could change mask_ and value_ here but we would never advance unless |
1926 // they had already been used in a check and they won't be used again because | 1856 // they had already been used in a check and they won't be used again because |
1927 // it would gain us nothing. So there's no point. | 1857 // it would gain us nothing. So there's no point. |
1928 } | 1858 } |
1929 | 1859 |
1930 | |
1931 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { | 1860 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { |
1932 ASSERT(characters_ == other->characters_); | 1861 ASSERT(characters_ == other->characters_); |
1933 if (other->cannot_match_) { | 1862 if (other->cannot_match_) { |
1934 return; | 1863 return; |
1935 } | 1864 } |
1936 if (cannot_match_) { | 1865 if (cannot_match_) { |
1937 *this = *other; | 1866 *this = *other; |
1938 return; | 1867 return; |
1939 } | 1868 } |
1940 for (intptr_t i = from_index; i < characters_; i++) { | 1869 for (intptr_t i = from_index; i < characters_; i++) { |
1941 QuickCheckDetails::Position* pos = positions(i); | 1870 QuickCheckDetails::Position* pos = positions(i); |
1942 QuickCheckDetails::Position* other_pos = other->positions(i); | 1871 QuickCheckDetails::Position* other_pos = other->positions(i); |
1943 if (pos->mask != other_pos->mask || pos->value != other_pos->value || | 1872 if (pos->mask != other_pos->mask || pos->value != other_pos->value || |
1944 !other_pos->determines_perfectly) { | 1873 !other_pos->determines_perfectly) { |
1945 // Our mask-compare operation will be approximate unless we have the | 1874 // Our mask-compare operation will be approximate unless we have the |
1946 // exact same operation on both sides of the alternation. | 1875 // exact same operation on both sides of the alternation. |
1947 pos->determines_perfectly = false; | 1876 pos->determines_perfectly = false; |
1948 } | 1877 } |
1949 pos->mask &= other_pos->mask; | 1878 pos->mask &= other_pos->mask; |
1950 pos->value &= pos->mask; | 1879 pos->value &= pos->mask; |
1951 other_pos->value &= pos->mask; | 1880 other_pos->value &= pos->mask; |
1952 uint16_t differing_bits = (pos->value ^ other_pos->value); | 1881 uint16_t differing_bits = (pos->value ^ other_pos->value); |
1953 pos->mask &= ~differing_bits; | 1882 pos->mask &= ~differing_bits; |
1954 pos->value &= pos->mask; | 1883 pos->value &= pos->mask; |
1955 } | 1884 } |
1956 } | 1885 } |
1957 | 1886 |
1958 | |
1959 class VisitMarker : public ValueObject { | 1887 class VisitMarker : public ValueObject { |
1960 public: | 1888 public: |
1961 explicit VisitMarker(NodeInfo* info) : info_(info) { | 1889 explicit VisitMarker(NodeInfo* info) : info_(info) { |
1962 ASSERT(!info->visited); | 1890 ASSERT(!info->visited); |
1963 info->visited = true; | 1891 info->visited = true; |
1964 } | 1892 } |
1965 ~VisitMarker() { info_->visited = false; } | 1893 ~VisitMarker() { info_->visited = false; } |
1966 | 1894 |
1967 private: | 1895 private: |
1968 NodeInfo* info_; | 1896 NodeInfo* info_; |
1969 }; | 1897 }; |
1970 | 1898 |
1971 | |
1972 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1899 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
1973 if (info()->replacement_calculated) return replacement(); | 1900 if (info()->replacement_calculated) return replacement(); |
1974 if (depth < 0) return this; | 1901 if (depth < 0) return this; |
1975 ASSERT(!info()->visited); | 1902 ASSERT(!info()->visited); |
1976 VisitMarker marker(info()); | 1903 VisitMarker marker(info()); |
1977 return FilterSuccessor(depth - 1, ignore_case); | 1904 return FilterSuccessor(depth - 1, ignore_case); |
1978 } | 1905 } |
1979 | 1906 |
1980 | |
1981 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { | 1907 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { |
1982 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 1908 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
1983 if (next == NULL) return set_replacement(NULL); | 1909 if (next == NULL) return set_replacement(NULL); |
1984 on_success_ = next; | 1910 on_success_ = next; |
1985 return set_replacement(this); | 1911 return set_replacement(this); |
1986 } | 1912 } |
1987 | 1913 |
1988 | |
1989 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 1914 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
1990 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 1915 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
1991 // TODO(dcarney): this could be a lot more efficient. | 1916 // TODO(dcarney): this could be a lot more efficient. |
1992 return range.Contains(0x39c) || range.Contains(0x3bc) || | 1917 return range.Contains(0x39c) || range.Contains(0x3bc) || |
1993 range.Contains(0x178); | 1918 range.Contains(0x178); |
1994 } | 1919 } |
1995 | 1920 |
1996 | |
1997 static bool RangesContainLatin1Equivalents( | 1921 static bool RangesContainLatin1Equivalents( |
1998 ZoneGrowableArray<CharacterRange>* ranges) { | 1922 ZoneGrowableArray<CharacterRange>* ranges) { |
1999 for (intptr_t i = 0; i < ranges->length(); i++) { | 1923 for (intptr_t i = 0; i < ranges->length(); i++) { |
2000 // TODO(dcarney): this could be a lot more efficient. | 1924 // TODO(dcarney): this could be a lot more efficient. |
2001 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; | 1925 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; |
2002 } | 1926 } |
2003 return false; | 1927 return false; |
2004 } | 1928 } |
2005 | 1929 |
2006 | |
2007 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { | 1930 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { |
2008 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); | 1931 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); |
2009 switch (c) { | 1932 switch (c) { |
2010 // This are equivalent characters in unicode. | 1933 // This are equivalent characters in unicode. |
2011 case 0x39c: | 1934 case 0x39c: |
2012 case 0x3bc: | 1935 case 0x3bc: |
2013 return 0xb5; | 1936 return 0xb5; |
2014 // This is an uppercase of a Latin-1 character | 1937 // This is an uppercase of a Latin-1 character |
2015 // outside of Latin-1. | 1938 // outside of Latin-1. |
2016 case 0x178: | 1939 case 0x178: |
2017 return 0xff; | 1940 return 0xff; |
2018 } | 1941 } |
2019 return 0; | 1942 return 0; |
2020 } | 1943 } |
2021 | 1944 |
2022 | |
2023 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1945 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2024 if (info()->replacement_calculated) return replacement(); | 1946 if (info()->replacement_calculated) return replacement(); |
2025 if (depth < 0) return this; | 1947 if (depth < 0) return this; |
2026 ASSERT(!info()->visited); | 1948 ASSERT(!info()->visited); |
2027 VisitMarker marker(info()); | 1949 VisitMarker marker(info()); |
2028 intptr_t element_count = elms_->length(); | 1950 intptr_t element_count = elms_->length(); |
2029 for (intptr_t i = 0; i < element_count; i++) { | 1951 for (intptr_t i = 0; i < element_count; i++) { |
2030 TextElement elm = elms_->At(i); | 1952 TextElement elm = elms_->At(i); |
2031 if (elm.text_type() == TextElement::ATOM) { | 1953 if (elm.text_type() == TextElement::ATOM) { |
2032 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); | 1954 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2064 // This will be handled in a later filter. | 1986 // This will be handled in a later filter. |
2065 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 1987 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2066 return set_replacement(NULL); | 1988 return set_replacement(NULL); |
2067 } | 1989 } |
2068 } | 1990 } |
2069 } | 1991 } |
2070 } | 1992 } |
2071 return FilterSuccessor(depth - 1, ignore_case); | 1993 return FilterSuccessor(depth - 1, ignore_case); |
2072 } | 1994 } |
2073 | 1995 |
2074 | |
2075 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1996 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2076 if (info()->replacement_calculated) return replacement(); | 1997 if (info()->replacement_calculated) return replacement(); |
2077 if (depth < 0) return this; | 1998 if (depth < 0) return this; |
2078 if (info()->visited) return this; | 1999 if (info()->visited) return this; |
2079 { | 2000 { |
2080 VisitMarker marker(info()); | 2001 VisitMarker marker(info()); |
2081 | 2002 |
2082 RegExpNode* continue_replacement = | 2003 RegExpNode* continue_replacement = |
2083 continue_node_->FilterOneByte(depth - 1, ignore_case); | 2004 continue_node_->FilterOneByte(depth - 1, ignore_case); |
2084 // If we can't continue after the loop then there is no sense in doing the | 2005 // If we can't continue after the loop then there is no sense in doing the |
2085 // loop. | 2006 // loop. |
2086 if (continue_replacement == NULL) return set_replacement(NULL); | 2007 if (continue_replacement == NULL) return set_replacement(NULL); |
2087 } | 2008 } |
2088 | 2009 |
2089 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); | 2010 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
2090 } | 2011 } |
2091 | 2012 |
2092 | |
2093 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 2013 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2094 if (info()->replacement_calculated) return replacement(); | 2014 if (info()->replacement_calculated) return replacement(); |
2095 if (depth < 0) return this; | 2015 if (depth < 0) return this; |
2096 if (info()->visited) return this; | 2016 if (info()->visited) return this; |
2097 VisitMarker marker(info()); | 2017 VisitMarker marker(info()); |
2098 intptr_t choice_count = alternatives_->length(); | 2018 intptr_t choice_count = alternatives_->length(); |
2099 | 2019 |
2100 for (intptr_t i = 0; i < choice_count; i++) { | 2020 for (intptr_t i = 0; i < choice_count; i++) { |
2101 GuardedAlternative alternative = alternatives_->At(i); | 2021 GuardedAlternative alternative = alternatives_->At(i); |
2102 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2022 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
(...skipping 30 matching lines...) Expand all Loading... |
2133 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); | 2053 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); |
2134 if (replacement != NULL) { | 2054 if (replacement != NULL) { |
2135 (*alternatives_)[i].set_node(replacement); | 2055 (*alternatives_)[i].set_node(replacement); |
2136 new_alternatives->Add((*alternatives_)[i]); | 2056 new_alternatives->Add((*alternatives_)[i]); |
2137 } | 2057 } |
2138 } | 2058 } |
2139 alternatives_ = new_alternatives; | 2059 alternatives_ = new_alternatives; |
2140 return this; | 2060 return this; |
2141 } | 2061 } |
2142 | 2062 |
2143 | |
2144 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, | 2063 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, |
2145 bool ignore_case) { | 2064 bool ignore_case) { |
2146 if (info()->replacement_calculated) return replacement(); | 2065 if (info()->replacement_calculated) return replacement(); |
2147 if (depth < 0) return this; | 2066 if (depth < 0) return this; |
2148 if (info()->visited) return this; | 2067 if (info()->visited) return this; |
2149 VisitMarker marker(info()); | 2068 VisitMarker marker(info()); |
2150 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 2069 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
2151 // afterwards. | 2070 // afterwards. |
2152 RegExpNode* node = (*alternatives_)[1].node(); | 2071 RegExpNode* node = (*alternatives_)[1].node(); |
2153 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); | 2072 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
2154 if (replacement == NULL) return set_replacement(NULL); | 2073 if (replacement == NULL) return set_replacement(NULL); |
2155 (*alternatives_)[1].set_node(replacement); | 2074 (*alternatives_)[1].set_node(replacement); |
2156 | 2075 |
2157 RegExpNode* neg_node = (*alternatives_)[0].node(); | 2076 RegExpNode* neg_node = (*alternatives_)[0].node(); |
2158 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); | 2077 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
2159 // If the negative lookahead is always going to fail then | 2078 // If the negative lookahead is always going to fail then |
2160 // we don't need to check it. | 2079 // we don't need to check it. |
2161 if (neg_replacement == NULL) return set_replacement(replacement); | 2080 if (neg_replacement == NULL) return set_replacement(replacement); |
2162 (*alternatives_)[0].set_node(neg_replacement); | 2081 (*alternatives_)[0].set_node(neg_replacement); |
2163 return set_replacement(this); | 2082 return set_replacement(this); |
2164 } | 2083 } |
2165 | 2084 |
2166 | |
2167 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2085 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2168 RegExpCompiler* compiler, | 2086 RegExpCompiler* compiler, |
2169 intptr_t characters_filled_in, | 2087 intptr_t characters_filled_in, |
2170 bool not_at_start) { | 2088 bool not_at_start) { |
2171 if (body_can_be_zero_length_ || info()->visited) return; | 2089 if (body_can_be_zero_length_ || info()->visited) return; |
2172 VisitMarker marker(info()); | 2090 VisitMarker marker(info()); |
2173 return ChoiceNode::GetQuickCheckDetails(details, compiler, | 2091 return ChoiceNode::GetQuickCheckDetails(details, compiler, |
2174 characters_filled_in, not_at_start); | 2092 characters_filled_in, not_at_start); |
2175 } | 2093 } |
2176 | 2094 |
2177 | |
2178 void LoopChoiceNode::FillInBMInfo(intptr_t offset, | 2095 void LoopChoiceNode::FillInBMInfo(intptr_t offset, |
2179 intptr_t budget, | 2096 intptr_t budget, |
2180 BoyerMooreLookahead* bm, | 2097 BoyerMooreLookahead* bm, |
2181 bool not_at_start) { | 2098 bool not_at_start) { |
2182 if (body_can_be_zero_length_ || budget <= 0) { | 2099 if (body_can_be_zero_length_ || budget <= 0) { |
2183 bm->SetRest(offset); | 2100 bm->SetRest(offset); |
2184 SaveBMInfo(bm, not_at_start, offset); | 2101 SaveBMInfo(bm, not_at_start, offset); |
2185 return; | 2102 return; |
2186 } | 2103 } |
2187 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); | 2104 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); |
2188 SaveBMInfo(bm, not_at_start, offset); | 2105 SaveBMInfo(bm, not_at_start, offset); |
2189 } | 2106 } |
2190 | 2107 |
2191 | |
2192 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2108 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2193 RegExpCompiler* compiler, | 2109 RegExpCompiler* compiler, |
2194 intptr_t characters_filled_in, | 2110 intptr_t characters_filled_in, |
2195 bool not_at_start) { | 2111 bool not_at_start) { |
2196 not_at_start = (not_at_start || not_at_start_); | 2112 not_at_start = (not_at_start || not_at_start_); |
2197 intptr_t choice_count = alternatives_->length(); | 2113 intptr_t choice_count = alternatives_->length(); |
2198 ASSERT(choice_count > 0); | 2114 ASSERT(choice_count > 0); |
2199 (*alternatives_)[0].node()->GetQuickCheckDetails( | 2115 (*alternatives_)[0].node()->GetQuickCheckDetails( |
2200 details, compiler, characters_filled_in, not_at_start); | 2116 details, compiler, characters_filled_in, not_at_start); |
2201 for (intptr_t i = 1; i < choice_count; i++) { | 2117 for (intptr_t i = 1; i < choice_count; i++) { |
2202 QuickCheckDetails new_details(details->characters()); | 2118 QuickCheckDetails new_details(details->characters()); |
2203 RegExpNode* node = (*alternatives_)[i].node(); | 2119 RegExpNode* node = (*alternatives_)[i].node(); |
2204 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, | 2120 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, |
2205 not_at_start); | 2121 not_at_start); |
2206 // Here we merge the quick match details of the two branches. | 2122 // Here we merge the quick match details of the two branches. |
2207 details->Merge(&new_details, characters_filled_in); | 2123 details->Merge(&new_details, characters_filled_in); |
2208 } | 2124 } |
2209 } | 2125 } |
2210 | 2126 |
2211 | |
2212 // Check for [0-9A-Z_a-z]. | 2127 // Check for [0-9A-Z_a-z]. |
2213 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2128 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
2214 BlockLabel* word, | 2129 BlockLabel* word, |
2215 BlockLabel* non_word, | 2130 BlockLabel* non_word, |
2216 bool fall_through_on_word) { | 2131 bool fall_through_on_word) { |
2217 if (assembler->CheckSpecialCharacterClass( | 2132 if (assembler->CheckSpecialCharacterClass( |
2218 fall_through_on_word ? 'w' : 'W', | 2133 fall_through_on_word ? 'w' : 'W', |
2219 fall_through_on_word ? non_word : word)) { | 2134 fall_through_on_word ? non_word : word)) { |
2220 // Optimized implementation available. | 2135 // Optimized implementation available. |
2221 return; | 2136 return; |
2222 } | 2137 } |
2223 assembler->CheckCharacterGT('z', non_word); | 2138 assembler->CheckCharacterGT('z', non_word); |
2224 assembler->CheckCharacterLT('0', non_word); | 2139 assembler->CheckCharacterLT('0', non_word); |
2225 assembler->CheckCharacterGT('a' - 1, word); | 2140 assembler->CheckCharacterGT('a' - 1, word); |
2226 assembler->CheckCharacterLT('9' + 1, word); | 2141 assembler->CheckCharacterLT('9' + 1, word); |
2227 assembler->CheckCharacterLT('A', non_word); | 2142 assembler->CheckCharacterLT('A', non_word); |
2228 assembler->CheckCharacterLT('Z' + 1, word); | 2143 assembler->CheckCharacterLT('Z' + 1, word); |
2229 if (fall_through_on_word) { | 2144 if (fall_through_on_word) { |
2230 assembler->CheckNotCharacter('_', non_word); | 2145 assembler->CheckNotCharacter('_', non_word); |
2231 } else { | 2146 } else { |
2232 assembler->CheckCharacter('_', word); | 2147 assembler->CheckCharacter('_', word); |
2233 } | 2148 } |
2234 } | 2149 } |
2235 | 2150 |
2236 | |
2237 // Emit the code to check for a ^ in multiline mode (1-character lookbehind | 2151 // Emit the code to check for a ^ in multiline mode (1-character lookbehind |
2238 // that matches newline or the start of input). | 2152 // that matches newline or the start of input). |
2239 static void EmitHat(RegExpCompiler* compiler, | 2153 static void EmitHat(RegExpCompiler* compiler, |
2240 RegExpNode* on_success, | 2154 RegExpNode* on_success, |
2241 Trace* trace) { | 2155 Trace* trace) { |
2242 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2156 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2243 // We will be loading the previous character into the current character | 2157 // We will be loading the previous character into the current character |
2244 // register. | 2158 // register. |
2245 Trace new_trace(*trace); | 2159 Trace new_trace(*trace); |
2246 new_trace.InvalidateCurrentCharacter(); | 2160 new_trace.InvalidateCurrentCharacter(); |
(...skipping 13 matching lines...) Expand all Loading... |
2260 if (!compiler->one_byte()) { | 2174 if (!compiler->one_byte()) { |
2261 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2175 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
2262 } | 2176 } |
2263 assembler->CheckCharacter('\n', &ok); | 2177 assembler->CheckCharacter('\n', &ok); |
2264 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 2178 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
2265 } | 2179 } |
2266 assembler->BindBlock(&ok); | 2180 assembler->BindBlock(&ok); |
2267 on_success->Emit(compiler, &new_trace); | 2181 on_success->Emit(compiler, &new_trace); |
2268 } | 2182 } |
2269 | 2183 |
2270 | |
2271 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2184 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
2272 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 2185 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
2273 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2186 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2274 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2187 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
2275 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 2188 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
2276 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2189 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
2277 if (lookahead == NULL) { | 2190 if (lookahead == NULL) { |
2278 intptr_t eats_at_least = | 2191 intptr_t eats_at_least = |
2279 Utils::Minimum(kMaxLookaheadForBoyerMoore, | 2192 Utils::Minimum(kMaxLookaheadForBoyerMoore, |
2280 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, | 2193 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2315 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2228 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
2316 assembler->BindBlock(&ok); | 2229 assembler->BindBlock(&ok); |
2317 } else if (next_is_word_character == Trace::TRUE_VALUE) { | 2230 } else if (next_is_word_character == Trace::TRUE_VALUE) { |
2318 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2231 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
2319 } else { | 2232 } else { |
2320 ASSERT(next_is_word_character == Trace::FALSE_VALUE); | 2233 ASSERT(next_is_word_character == Trace::FALSE_VALUE); |
2321 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); | 2234 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
2322 } | 2235 } |
2323 } | 2236 } |
2324 | 2237 |
2325 | |
2326 void AssertionNode::BacktrackIfPrevious( | 2238 void AssertionNode::BacktrackIfPrevious( |
2327 RegExpCompiler* compiler, | 2239 RegExpCompiler* compiler, |
2328 Trace* trace, | 2240 Trace* trace, |
2329 AssertionNode::IfPrevious backtrack_if_previous) { | 2241 AssertionNode::IfPrevious backtrack_if_previous) { |
2330 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2242 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2331 Trace new_trace(*trace); | 2243 Trace new_trace(*trace); |
2332 new_trace.InvalidateCurrentCharacter(); | 2244 new_trace.InvalidateCurrentCharacter(); |
2333 | 2245 |
2334 BlockLabel fall_through, dummy; | 2246 BlockLabel fall_through, dummy; |
2335 | 2247 |
(...skipping 11 matching lines...) Expand all Loading... |
2347 } | 2259 } |
2348 // We already checked that we are not at the start of input so it must be | 2260 // We already checked that we are not at the start of input so it must be |
2349 // OK to load the previous character. | 2261 // OK to load the previous character. |
2350 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); | 2262 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
2351 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); | 2263 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
2352 | 2264 |
2353 assembler->BindBlock(&fall_through); | 2265 assembler->BindBlock(&fall_through); |
2354 on_success()->Emit(compiler, &new_trace); | 2266 on_success()->Emit(compiler, &new_trace); |
2355 } | 2267 } |
2356 | 2268 |
2357 | |
2358 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2269 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2359 RegExpCompiler* compiler, | 2270 RegExpCompiler* compiler, |
2360 intptr_t filled_in, | 2271 intptr_t filled_in, |
2361 bool not_at_start) { | 2272 bool not_at_start) { |
2362 if (assertion_type_ == AT_START && not_at_start) { | 2273 if (assertion_type_ == AT_START && not_at_start) { |
2363 details->set_cannot_match(); | 2274 details->set_cannot_match(); |
2364 return; | 2275 return; |
2365 } | 2276 } |
2366 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, | 2277 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, |
2367 not_at_start); | 2278 not_at_start); |
2368 } | 2279 } |
2369 | 2280 |
2370 | |
2371 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2281 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2372 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2282 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2373 switch (assertion_type_) { | 2283 switch (assertion_type_) { |
2374 case AT_END: { | 2284 case AT_END: { |
2375 BlockLabel ok; | 2285 BlockLabel ok; |
2376 assembler->CheckPosition(trace->cp_offset(), &ok); | 2286 assembler->CheckPosition(trace->cp_offset(), &ok); |
2377 assembler->GoTo(trace->backtrack()); | 2287 assembler->GoTo(trace->backtrack()); |
2378 assembler->BindBlock(&ok); | 2288 assembler->BindBlock(&ok); |
2379 break; | 2289 break; |
2380 } | 2290 } |
(...skipping 15 matching lines...) Expand all Loading... |
2396 return; | 2306 return; |
2397 case AT_BOUNDARY: | 2307 case AT_BOUNDARY: |
2398 case AT_NON_BOUNDARY: { | 2308 case AT_NON_BOUNDARY: { |
2399 EmitBoundaryCheck(compiler, trace); | 2309 EmitBoundaryCheck(compiler, trace); |
2400 return; | 2310 return; |
2401 } | 2311 } |
2402 } | 2312 } |
2403 on_success()->Emit(compiler, trace); | 2313 on_success()->Emit(compiler, trace); |
2404 } | 2314 } |
2405 | 2315 |
2406 | |
2407 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { | 2316 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { |
2408 if (quick_check == NULL) return false; | 2317 if (quick_check == NULL) return false; |
2409 if (offset >= quick_check->characters()) return false; | 2318 if (offset >= quick_check->characters()) return false; |
2410 return quick_check->positions(offset)->determines_perfectly; | 2319 return quick_check->positions(offset)->determines_perfectly; |
2411 } | 2320 } |
2412 | 2321 |
2413 | |
2414 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { | 2322 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { |
2415 if (index > *checked_up_to) { | 2323 if (index > *checked_up_to) { |
2416 *checked_up_to = index; | 2324 *checked_up_to = index; |
2417 } | 2325 } |
2418 } | 2326 } |
2419 | 2327 |
2420 | |
2421 // We call this repeatedly to generate code for each pass over the text node. | 2328 // We call this repeatedly to generate code for each pass over the text node. |
2422 // The passes are in increasing order of difficulty because we hope one | 2329 // The passes are in increasing order of difficulty because we hope one |
2423 // of the first passes will fail in which case we are saved the work of the | 2330 // of the first passes will fail in which case we are saved the work of the |
2424 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ | 2331 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ |
2425 // we will check the '%' in the first pass, the case independent 'a' in the | 2332 // we will check the '%' in the first pass, the case independent 'a' in the |
2426 // second pass and the character class in the last pass. | 2333 // second pass and the character class in the last pass. |
2427 // | 2334 // |
2428 // The passes are done from right to left, so for example to test for /bar/ | 2335 // The passes are done from right to left, so for example to test for /bar/ |
2429 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 | 2336 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 |
2430 // and then a 'b' with offset 0. This means we can avoid the end-of-input | 2337 // and then a 'b' with offset 0. This means we can avoid the end-of-input |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2501 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 2408 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
2502 RegExpCharacterClass* cc = elm.char_class(); | 2409 RegExpCharacterClass* cc = elm.char_class(); |
2503 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, | 2410 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
2504 *checked_up_to < cp_offset, preloaded, Z); | 2411 *checked_up_to < cp_offset, preloaded, Z); |
2505 UpdateBoundsCheck(cp_offset, checked_up_to); | 2412 UpdateBoundsCheck(cp_offset, checked_up_to); |
2506 } | 2413 } |
2507 } | 2414 } |
2508 } | 2415 } |
2509 } | 2416 } |
2510 | 2417 |
2511 | |
2512 intptr_t TextNode::Length() { | 2418 intptr_t TextNode::Length() { |
2513 TextElement elm = elms_->Last(); | 2419 TextElement elm = elms_->Last(); |
2514 ASSERT(elm.cp_offset() >= 0); | 2420 ASSERT(elm.cp_offset() >= 0); |
2515 return elm.cp_offset() + elm.length(); | 2421 return elm.cp_offset() + elm.length(); |
2516 } | 2422 } |
2517 | 2423 |
2518 | |
2519 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { | 2424 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { |
2520 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); | 2425 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); |
2521 if (ignore_case) { | 2426 if (ignore_case) { |
2522 return pass == SIMPLE_CHARACTER_MATCH; | 2427 return pass == SIMPLE_CHARACTER_MATCH; |
2523 } else { | 2428 } else { |
2524 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 2429 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
2525 } | 2430 } |
2526 } | 2431 } |
2527 | 2432 |
2528 | |
2529 // This generates the code to match a text node. A text node can contain | 2433 // This generates the code to match a text node. A text node can contain |
2530 // straight character sequences (possibly to be matched in a case-independent | 2434 // straight character sequences (possibly to be matched in a case-independent |
2531 // way) and character classes. For efficiency we do not do this in a single | 2435 // way) and character classes. For efficiency we do not do this in a single |
2532 // pass from left to right. Instead we pass over the text node several times, | 2436 // pass from left to right. Instead we pass over the text node several times, |
2533 // emitting code for some character positions every time. See the comment on | 2437 // emitting code for some character positions every time. See the comment on |
2534 // TextEmitPass for details. | 2438 // TextEmitPass for details. |
2535 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2439 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2536 LimitResult limit_result = LimitVersions(compiler, trace); | 2440 LimitResult limit_result = LimitVersions(compiler, trace); |
2537 if (limit_result == DONE) return; | 2441 if (limit_result == DONE) return; |
2538 ASSERT(limit_result == CONTINUE); | 2442 ASSERT(limit_result == CONTINUE); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2570 } | 2474 } |
2571 } | 2475 } |
2572 | 2476 |
2573 Trace successor_trace(*trace); | 2477 Trace successor_trace(*trace); |
2574 successor_trace.set_at_start(false); | 2478 successor_trace.set_at_start(false); |
2575 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 2479 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
2576 RecursionCheck rc(compiler); | 2480 RecursionCheck rc(compiler); |
2577 on_success()->Emit(compiler, &successor_trace); | 2481 on_success()->Emit(compiler, &successor_trace); |
2578 } | 2482 } |
2579 | 2483 |
2580 | |
2581 void Trace::InvalidateCurrentCharacter() { | 2484 void Trace::InvalidateCurrentCharacter() { |
2582 characters_preloaded_ = 0; | 2485 characters_preloaded_ = 0; |
2583 } | 2486 } |
2584 | 2487 |
2585 | |
2586 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, | 2488 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, |
2587 RegExpCompiler* compiler) { | 2489 RegExpCompiler* compiler) { |
2588 ASSERT(by > 0); | 2490 ASSERT(by > 0); |
2589 // We don't have an instruction for shifting the current character register | 2491 // We don't have an instruction for shifting the current character register |
2590 // down or for using a shifted value for anything so lets just forget that | 2492 // down or for using a shifted value for anything so lets just forget that |
2591 // we preloaded any characters into it. | 2493 // we preloaded any characters into it. |
2592 characters_preloaded_ = 0; | 2494 characters_preloaded_ = 0; |
2593 // Adjust the offsets of the quick check performed information. This | 2495 // Adjust the offsets of the quick check performed information. This |
2594 // information is used to find out what we already determined about the | 2496 // information is used to find out what we already determined about the |
2595 // characters by means of mask and compare. | 2497 // characters by means of mask and compare. |
2596 quick_check_performed_.Advance(by, compiler->one_byte()); | 2498 quick_check_performed_.Advance(by, compiler->one_byte()); |
2597 cp_offset_ += by; | 2499 cp_offset_ += by; |
2598 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 2500 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
2599 compiler->SetRegExpTooBig(); | 2501 compiler->SetRegExpTooBig(); |
2600 cp_offset_ = 0; | 2502 cp_offset_ = 0; |
2601 } | 2503 } |
2602 bound_checked_up_to_ = | 2504 bound_checked_up_to_ = |
2603 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); | 2505 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); |
2604 } | 2506 } |
2605 | 2507 |
2606 | |
2607 void TextNode::MakeCaseIndependent(bool is_one_byte) { | 2508 void TextNode::MakeCaseIndependent(bool is_one_byte) { |
2608 intptr_t element_count = elms_->length(); | 2509 intptr_t element_count = elms_->length(); |
2609 for (intptr_t i = 0; i < element_count; i++) { | 2510 for (intptr_t i = 0; i < element_count; i++) { |
2610 TextElement elm = elms_->At(i); | 2511 TextElement elm = elms_->At(i); |
2611 if (elm.text_type() == TextElement::CHAR_CLASS) { | 2512 if (elm.text_type() == TextElement::CHAR_CLASS) { |
2612 RegExpCharacterClass* cc = elm.char_class(); | 2513 RegExpCharacterClass* cc = elm.char_class(); |
2613 // None of the standard character classes is different in the case | 2514 // None of the standard character classes is different in the case |
2614 // independent case and it slows us down if we don't know that. | 2515 // independent case and it slows us down if we don't know that. |
2615 if (cc->is_standard()) continue; | 2516 if (cc->is_standard()) continue; |
2616 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 2517 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
2617 intptr_t range_count = ranges->length(); | 2518 intptr_t range_count = ranges->length(); |
2618 for (intptr_t j = 0; j < range_count; j++) { | 2519 for (intptr_t j = 0; j < range_count; j++) { |
2619 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z); | 2520 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z); |
2620 } | 2521 } |
2621 } | 2522 } |
2622 } | 2523 } |
2623 } | 2524 } |
2624 | 2525 |
2625 | |
2626 intptr_t TextNode::GreedyLoopTextLength() { | 2526 intptr_t TextNode::GreedyLoopTextLength() { |
2627 TextElement elm = elms_->At(elms_->length() - 1); | 2527 TextElement elm = elms_->At(elms_->length() - 1); |
2628 return elm.cp_offset() + elm.length(); | 2528 return elm.cp_offset() + elm.length(); |
2629 } | 2529 } |
2630 | 2530 |
2631 | |
2632 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 2531 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
2633 RegExpCompiler* compiler) { | 2532 RegExpCompiler* compiler) { |
2634 if (elms_->length() != 1) return NULL; | 2533 if (elms_->length() != 1) return NULL; |
2635 TextElement elm = elms_->At(0); | 2534 TextElement elm = elms_->At(0); |
2636 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; | 2535 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
2637 RegExpCharacterClass* node = elm.char_class(); | 2536 RegExpCharacterClass* node = elm.char_class(); |
2638 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); | 2537 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); |
2639 if (!CharacterRange::IsCanonical(ranges)) { | 2538 if (!CharacterRange::IsCanonical(ranges)) { |
2640 CharacterRange::Canonicalize(ranges); | 2539 CharacterRange::Canonicalize(ranges); |
2641 } | 2540 } |
2642 if (node->is_negated()) { | 2541 if (node->is_negated()) { |
2643 return ranges->length() == 0 ? on_success() : NULL; | 2542 return ranges->length() == 0 ? on_success() : NULL; |
2644 } | 2543 } |
2645 if (ranges->length() != 1) return NULL; | 2544 if (ranges->length() != 1) return NULL; |
2646 uint32_t max_char; | 2545 uint32_t max_char; |
2647 if (compiler->one_byte()) { | 2546 if (compiler->one_byte()) { |
2648 max_char = Symbols::kMaxOneCharCodeSymbol; | 2547 max_char = Symbols::kMaxOneCharCodeSymbol; |
2649 } else { | 2548 } else { |
2650 max_char = Utf16::kMaxCodeUnit; | 2549 max_char = Utf16::kMaxCodeUnit; |
2651 } | 2550 } |
2652 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; | 2551 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; |
2653 } | 2552 } |
2654 | 2553 |
2655 | |
2656 // Finds the fixed match length of a sequence of nodes that goes from | 2554 // Finds the fixed match length of a sequence of nodes that goes from |
2657 // this alternative and back to this choice node. If there are variable | 2555 // this alternative and back to this choice node. If there are variable |
2658 // length nodes or other complications in the way then return a sentinel | 2556 // length nodes or other complications in the way then return a sentinel |
2659 // value indicating that a greedy loop cannot be constructed. | 2557 // value indicating that a greedy loop cannot be constructed. |
2660 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( | 2558 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( |
2661 GuardedAlternative* alternative) { | 2559 GuardedAlternative* alternative) { |
2662 intptr_t length = 0; | 2560 intptr_t length = 0; |
2663 RegExpNode* node = alternative->node(); | 2561 RegExpNode* node = alternative->node(); |
2664 // Later we will generate code for all these text nodes using recursion | 2562 // Later we will generate code for all these text nodes using recursion |
2665 // so we have to limit the max number. | 2563 // so we have to limit the max number. |
2666 intptr_t recursion_depth = 0; | 2564 intptr_t recursion_depth = 0; |
2667 while (node != this) { | 2565 while (node != this) { |
2668 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | 2566 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { |
2669 return kNodeIsTooComplexForGreedyLoops; | 2567 return kNodeIsTooComplexForGreedyLoops; |
2670 } | 2568 } |
2671 intptr_t node_length = node->GreedyLoopTextLength(); | 2569 intptr_t node_length = node->GreedyLoopTextLength(); |
2672 if (node_length == kNodeIsTooComplexForGreedyLoops) { | 2570 if (node_length == kNodeIsTooComplexForGreedyLoops) { |
2673 return kNodeIsTooComplexForGreedyLoops; | 2571 return kNodeIsTooComplexForGreedyLoops; |
2674 } | 2572 } |
2675 length += node_length; | 2573 length += node_length; |
2676 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | 2574 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); |
2677 node = seq_node->on_success(); | 2575 node = seq_node->on_success(); |
2678 } | 2576 } |
2679 return length; | 2577 return length; |
2680 } | 2578 } |
2681 | 2579 |
2682 | |
2683 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { | 2580 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { |
2684 ASSERT(loop_node_ == NULL); | 2581 ASSERT(loop_node_ == NULL); |
2685 AddAlternative(alt); | 2582 AddAlternative(alt); |
2686 loop_node_ = alt.node(); | 2583 loop_node_ = alt.node(); |
2687 } | 2584 } |
2688 | 2585 |
2689 | |
2690 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { | 2586 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { |
2691 ASSERT(continue_node_ == NULL); | 2587 ASSERT(continue_node_ == NULL); |
2692 AddAlternative(alt); | 2588 AddAlternative(alt); |
2693 continue_node_ = alt.node(); | 2589 continue_node_ = alt.node(); |
2694 } | 2590 } |
2695 | 2591 |
2696 | |
2697 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2592 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2698 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2593 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
2699 if (trace->stop_node() == this) { | 2594 if (trace->stop_node() == this) { |
2700 // Back edge of greedy optimized loop node graph. | 2595 // Back edge of greedy optimized loop node graph. |
2701 intptr_t text_length = | 2596 intptr_t text_length = |
2702 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); | 2597 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); |
2703 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); | 2598 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); |
2704 // Update the counter-based backtracking info on the stack. This is an | 2599 // Update the counter-based backtracking info on the stack. This is an |
2705 // optimization for greedy loops (see below). | 2600 // optimization for greedy loops (see below). |
2706 ASSERT(trace->cp_offset() == text_length); | 2601 ASSERT(trace->cp_offset() == text_length); |
2707 macro_assembler->AdvanceCurrentPosition(text_length); | 2602 macro_assembler->AdvanceCurrentPosition(text_length); |
2708 macro_assembler->GoTo(trace->loop_label()); | 2603 macro_assembler->GoTo(trace->loop_label()); |
2709 return; | 2604 return; |
2710 } | 2605 } |
2711 ASSERT(trace->stop_node() == NULL); | 2606 ASSERT(trace->stop_node() == NULL); |
2712 if (!trace->is_trivial()) { | 2607 if (!trace->is_trivial()) { |
2713 trace->Flush(compiler, this); | 2608 trace->Flush(compiler, this); |
2714 return; | 2609 return; |
2715 } | 2610 } |
2716 ChoiceNode::Emit(compiler, trace); | 2611 ChoiceNode::Emit(compiler, trace); |
2717 } | 2612 } |
2718 | 2613 |
2719 | |
2720 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 2614 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
2721 intptr_t eats_at_least) { | 2615 intptr_t eats_at_least) { |
2722 intptr_t preload_characters = | 2616 intptr_t preload_characters = |
2723 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); | 2617 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); |
2724 if (compiler->macro_assembler()->CanReadUnaligned()) { | 2618 if (compiler->macro_assembler()->CanReadUnaligned()) { |
2725 bool one_byte = compiler->one_byte(); | 2619 bool one_byte = compiler->one_byte(); |
2726 if (one_byte) { | 2620 if (one_byte) { |
2727 if (preload_characters > 4) preload_characters = 4; | 2621 if (preload_characters > 4) preload_characters = 4; |
2728 // We can't preload 3 characters because there is no machine instruction | 2622 // We can't preload 3 characters because there is no machine instruction |
2729 // to do that. We can't just load 4 because we could be reading | 2623 // to do that. We can't just load 4 because we could be reading |
2730 // beyond the end of the string, which could cause a memory fault. | 2624 // beyond the end of the string, which could cause a memory fault. |
2731 if (preload_characters == 3) preload_characters = 2; | 2625 if (preload_characters == 3) preload_characters = 2; |
2732 } else { | 2626 } else { |
2733 if (preload_characters > 2) preload_characters = 2; | 2627 if (preload_characters > 2) preload_characters = 2; |
2734 } | 2628 } |
2735 } else { | 2629 } else { |
2736 if (preload_characters > 1) preload_characters = 1; | 2630 if (preload_characters > 1) preload_characters = 1; |
2737 } | 2631 } |
2738 return preload_characters; | 2632 return preload_characters; |
2739 } | 2633 } |
2740 | 2634 |
2741 | |
2742 // This structure is used when generating the alternatives in a choice node. It | 2635 // This structure is used when generating the alternatives in a choice node. It |
2743 // records the way the alternative is being code generated. | 2636 // records the way the alternative is being code generated. |
2744 struct AlternativeGeneration { | 2637 struct AlternativeGeneration { |
2745 AlternativeGeneration() | 2638 AlternativeGeneration() |
2746 : possible_success(), | 2639 : possible_success(), |
2747 expects_preload(false), | 2640 expects_preload(false), |
2748 after(), | 2641 after(), |
2749 quick_check_details() {} | 2642 quick_check_details() {} |
2750 BlockLabel possible_success; | 2643 BlockLabel possible_success; |
2751 bool expects_preload; | 2644 bool expects_preload; |
2752 BlockLabel after; | 2645 BlockLabel after; |
2753 QuickCheckDetails quick_check_details; | 2646 QuickCheckDetails quick_check_details; |
2754 }; | 2647 }; |
2755 | 2648 |
2756 | |
2757 // Creates a list of AlternativeGenerations. If the list has a reasonable | 2649 // Creates a list of AlternativeGenerations. If the list has a reasonable |
2758 // size then it is on the stack, otherwise the excess is on the heap. | 2650 // size then it is on the stack, otherwise the excess is on the heap. |
2759 class AlternativeGenerationList { | 2651 class AlternativeGenerationList { |
2760 public: | 2652 public: |
2761 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { | 2653 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { |
2762 for (intptr_t i = 0; i < count && i < kAFew; i++) { | 2654 for (intptr_t i = 0; i < count && i < kAFew; i++) { |
2763 alt_gens_.Add(a_few_alt_gens_ + i); | 2655 alt_gens_.Add(a_few_alt_gens_ + i); |
2764 } | 2656 } |
2765 for (intptr_t i = kAFew; i < count; i++) { | 2657 for (intptr_t i = kAFew; i < count; i++) { |
2766 alt_gens_.Add(new AlternativeGeneration()); | 2658 alt_gens_.Add(new AlternativeGeneration()); |
2767 } | 2659 } |
2768 } | 2660 } |
2769 ~AlternativeGenerationList() { | 2661 ~AlternativeGenerationList() { |
2770 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { | 2662 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { |
2771 delete alt_gens_[i]; | 2663 delete alt_gens_[i]; |
2772 alt_gens_[i] = NULL; | 2664 alt_gens_[i] = NULL; |
2773 } | 2665 } |
2774 } | 2666 } |
2775 | 2667 |
2776 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } | 2668 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } |
2777 | 2669 |
2778 private: | 2670 private: |
2779 static const intptr_t kAFew = 10; | 2671 static const intptr_t kAFew = 10; |
2780 GrowableArray<AlternativeGeneration*> alt_gens_; | 2672 GrowableArray<AlternativeGeneration*> alt_gens_; |
2781 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2673 AlternativeGeneration a_few_alt_gens_[kAFew]; |
2782 | 2674 |
2783 DISALLOW_ALLOCATION(); | 2675 DISALLOW_ALLOCATION(); |
2784 }; | 2676 }; |
2785 | 2677 |
2786 | |
2787 // The '2' variant is inclusive from and exclusive to. | 2678 // The '2' variant is inclusive from and exclusive to. |
2788 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, | 2679 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, |
2789 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. | 2680 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. |
2790 static const intptr_t kSpaceRanges[] = { | 2681 static const intptr_t kSpaceRanges[] = { |
2791 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, | 2682 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, |
2792 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, | 2683 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, |
2793 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; | 2684 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; |
2794 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); | 2685 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); |
2795 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', | 2686 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', |
2796 '_' + 1, 'a', 'z' + 1, 0x10000}; | 2687 '_' + 1, 'a', 'z' + 1, 0x10000}; |
2797 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); | 2688 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); |
2798 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; | 2689 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; |
2799 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); | 2690 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); |
2800 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; | 2691 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; |
2801 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); | 2692 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); |
2802 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, | 2693 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, |
2803 0x2028, 0x202A, 0x10000}; | 2694 0x2028, 0x202A, 0x10000}; |
2804 static const intptr_t kLineTerminatorRangeCount = | 2695 static const intptr_t kLineTerminatorRangeCount = |
2805 ARRAY_SIZE(kLineTerminatorRanges); | 2696 ARRAY_SIZE(kLineTerminatorRanges); |
2806 | 2697 |
2807 | |
2808 void BoyerMoorePositionInfo::Set(intptr_t character) { | 2698 void BoyerMoorePositionInfo::Set(intptr_t character) { |
2809 SetInterval(Interval(character, character)); | 2699 SetInterval(Interval(character, character)); |
2810 } | 2700 } |
2811 | 2701 |
2812 | |
2813 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { | 2702 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
2814 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); | 2703 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); |
2815 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); | 2704 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); |
2816 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); | 2705 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); |
2817 surrogate_ = | 2706 surrogate_ = |
2818 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); | 2707 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); |
2819 if (interval.to() - interval.from() >= kMapSize - 1) { | 2708 if (interval.to() - interval.from() >= kMapSize - 1) { |
2820 if (map_count_ != kMapSize) { | 2709 if (map_count_ != kMapSize) { |
2821 map_count_ = kMapSize; | 2710 map_count_ = kMapSize; |
2822 for (intptr_t i = 0; i < kMapSize; i++) | 2711 for (intptr_t i = 0; i < kMapSize; i++) |
2823 (*map_)[i] = true; | 2712 (*map_)[i] = true; |
2824 } | 2713 } |
2825 return; | 2714 return; |
2826 } | 2715 } |
2827 for (intptr_t i = interval.from(); i <= interval.to(); i++) { | 2716 for (intptr_t i = interval.from(); i <= interval.to(); i++) { |
2828 intptr_t mod_character = (i & kMask); | 2717 intptr_t mod_character = (i & kMask); |
2829 if (!map_->At(mod_character)) { | 2718 if (!map_->At(mod_character)) { |
2830 map_count_++; | 2719 map_count_++; |
2831 (*map_)[mod_character] = true; | 2720 (*map_)[mod_character] = true; |
2832 } | 2721 } |
2833 if (map_count_ == kMapSize) return; | 2722 if (map_count_ == kMapSize) return; |
2834 } | 2723 } |
2835 } | 2724 } |
2836 | 2725 |
2837 | |
2838 void BoyerMoorePositionInfo::SetAll() { | 2726 void BoyerMoorePositionInfo::SetAll() { |
2839 s_ = w_ = d_ = kLatticeUnknown; | 2727 s_ = w_ = d_ = kLatticeUnknown; |
2840 if (map_count_ != kMapSize) { | 2728 if (map_count_ != kMapSize) { |
2841 map_count_ = kMapSize; | 2729 map_count_ = kMapSize; |
2842 for (intptr_t i = 0; i < kMapSize; i++) | 2730 for (intptr_t i = 0; i < kMapSize; i++) |
2843 (*map_)[i] = true; | 2731 (*map_)[i] = true; |
2844 } | 2732 } |
2845 } | 2733 } |
2846 | 2734 |
2847 | |
2848 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, | 2735 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, |
2849 RegExpCompiler* compiler, | 2736 RegExpCompiler* compiler, |
2850 Zone* zone) | 2737 Zone* zone) |
2851 : length_(length), compiler_(compiler) { | 2738 : length_(length), compiler_(compiler) { |
2852 if (compiler->one_byte()) { | 2739 if (compiler->one_byte()) { |
2853 max_char_ = Symbols::kMaxOneCharCodeSymbol; | 2740 max_char_ = Symbols::kMaxOneCharCodeSymbol; |
2854 } else { | 2741 } else { |
2855 max_char_ = Utf16::kMaxCodeUnit; | 2742 max_char_ = Utf16::kMaxCodeUnit; |
2856 } | 2743 } |
2857 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); | 2744 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); |
2858 for (intptr_t i = 0; i < length; i++) { | 2745 for (intptr_t i = 0; i < length; i++) { |
2859 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); | 2746 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); |
2860 } | 2747 } |
2861 } | 2748 } |
2862 | 2749 |
2863 | |
2864 // Find the longest range of lookahead that has the fewest number of different | 2750 // Find the longest range of lookahead that has the fewest number of different |
2865 // characters that can occur at a given position. Since we are optimizing two | 2751 // characters that can occur at a given position. Since we are optimizing two |
2866 // different parameters at once this is a tradeoff. | 2752 // different parameters at once this is a tradeoff. |
2867 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { | 2753 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { |
2868 intptr_t biggest_points = 0; | 2754 intptr_t biggest_points = 0; |
2869 // If more than 32 characters out of 128 can occur it is unlikely that we can | 2755 // If more than 32 characters out of 128 can occur it is unlikely that we can |
2870 // be lucky enough to step forwards much of the time. | 2756 // be lucky enough to step forwards much of the time. |
2871 const intptr_t kMaxMax = 32; | 2757 const intptr_t kMaxMax = 32; |
2872 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; | 2758 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; |
2873 max_number_of_chars *= 2) { | 2759 max_number_of_chars *= 2) { |
2874 biggest_points = | 2760 biggest_points = |
2875 FindBestInterval(max_number_of_chars, biggest_points, from, to); | 2761 FindBestInterval(max_number_of_chars, biggest_points, from, to); |
2876 } | 2762 } |
2877 if (biggest_points == 0) return false; | 2763 if (biggest_points == 0) return false; |
2878 return true; | 2764 return true; |
2879 } | 2765 } |
2880 | 2766 |
2881 | |
2882 // Find the highest-points range between 0 and length_ where the character | 2767 // Find the highest-points range between 0 and length_ where the character |
2883 // information is not too vague. 'Too vague' means that there are more than | 2768 // information is not too vague. 'Too vague' means that there are more than |
2884 // max_number_of_chars that can occur at this position. Calculates the number | 2769 // max_number_of_chars that can occur at this position. Calculates the number |
2885 // of points as the product of width-of-the-range and | 2770 // of points as the product of width-of-the-range and |
2886 // probability-of-finding-one-of-the-characters, where the probability is | 2771 // probability-of-finding-one-of-the-characters, where the probability is |
2887 // calculated using the frequency distribution of the sample subject string. | 2772 // calculated using the frequency distribution of the sample subject string. |
2888 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, | 2773 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, |
2889 intptr_t old_biggest_points, | 2774 intptr_t old_biggest_points, |
2890 intptr_t* from, | 2775 intptr_t* from, |
2891 intptr_t* to) { | 2776 intptr_t* to) { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2931 intptr_t points = (i - remembered_from) * probability; | 2816 intptr_t points = (i - remembered_from) * probability; |
2932 if (points > biggest_points) { | 2817 if (points > biggest_points) { |
2933 *from = remembered_from; | 2818 *from = remembered_from; |
2934 *to = i - 1; | 2819 *to = i - 1; |
2935 biggest_points = points; | 2820 biggest_points = points; |
2936 } | 2821 } |
2937 } | 2822 } |
2938 return biggest_points; | 2823 return biggest_points; |
2939 } | 2824 } |
2940 | 2825 |
2941 | |
2942 // Take all the characters that will not prevent a successful match if they | 2826 // Take all the characters that will not prevent a successful match if they |
2943 // occur in the subject string in the range between min_lookahead and | 2827 // occur in the subject string in the range between min_lookahead and |
2944 // max_lookahead (inclusive) measured from the current position. If the | 2828 // max_lookahead (inclusive) measured from the current position. If the |
2945 // character at max_lookahead offset is not one of these characters, then we | 2829 // character at max_lookahead offset is not one of these characters, then we |
2946 // can safely skip forwards by the number of characters in the range. | 2830 // can safely skip forwards by the number of characters in the range. |
2947 intptr_t BoyerMooreLookahead::GetSkipTable( | 2831 intptr_t BoyerMooreLookahead::GetSkipTable( |
2948 intptr_t min_lookahead, | 2832 intptr_t min_lookahead, |
2949 intptr_t max_lookahead, | 2833 intptr_t max_lookahead, |
2950 const TypedData& boolean_skip_table) { | 2834 const TypedData& boolean_skip_table) { |
2951 const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 2835 const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
(...skipping 11 matching lines...) Expand all Loading... |
2963 for (intptr_t j = 0; j < kSize; j++) { | 2847 for (intptr_t j = 0; j < kSize; j++) { |
2964 if (map->at(j)) { | 2848 if (map->at(j)) { |
2965 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); | 2849 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); |
2966 } | 2850 } |
2967 } | 2851 } |
2968 } | 2852 } |
2969 | 2853 |
2970 return skip; | 2854 return skip; |
2971 } | 2855 } |
2972 | 2856 |
2973 | |
2974 // See comment above on the implementation of GetSkipTable. | 2857 // See comment above on the implementation of GetSkipTable. |
2975 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { | 2858 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { |
2976 const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 2859 const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
2977 | 2860 |
2978 intptr_t min_lookahead = 0; | 2861 intptr_t min_lookahead = 0; |
2979 intptr_t max_lookahead = 0; | 2862 intptr_t max_lookahead = 0; |
2980 | 2863 |
2981 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; | 2864 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; |
2982 | 2865 |
2983 bool found_single_character = false; | 2866 bool found_single_character = false; |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3034 masm->CheckPreemption(/*is_backtrack=*/false); | 2917 masm->CheckPreemption(/*is_backtrack=*/false); |
3035 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 2918 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3036 masm->CheckBitInTable(boolean_skip_table, &cont); | 2919 masm->CheckBitInTable(boolean_skip_table, &cont); |
3037 masm->AdvanceCurrentPosition(skip_distance); | 2920 masm->AdvanceCurrentPosition(skip_distance); |
3038 masm->GoTo(&again); | 2921 masm->GoTo(&again); |
3039 masm->BindBlock(&cont); | 2922 masm->BindBlock(&cont); |
3040 | 2923 |
3041 return; | 2924 return; |
3042 } | 2925 } |
3043 | 2926 |
3044 | |
3045 /* Code generation for choice nodes. | 2927 /* Code generation for choice nodes. |
3046 * | 2928 * |
3047 * We generate quick checks that do a mask and compare to eliminate a | 2929 * We generate quick checks that do a mask and compare to eliminate a |
3048 * choice. If the quick check succeeds then it jumps to the continuation to | 2930 * choice. If the quick check succeeds then it jumps to the continuation to |
3049 * do slow checks and check subsequent nodes. If it fails (the common case) | 2931 * do slow checks and check subsequent nodes. If it fails (the common case) |
3050 * it falls through to the next choice. | 2932 * it falls through to the next choice. |
3051 * | 2933 * |
3052 * Here is the desired flow graph. Nodes directly below each other imply | 2934 * Here is the desired flow graph. Nodes directly below each other imply |
3053 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative | 2935 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative |
3054 * 3 doesn't have a quick check so we have to call the slow check. | 2936 * 3 doesn't have a quick check so we have to call the slow check. |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3114 * S| / | 2996 * S| / |
3115 * V F / | 2997 * V F / |
3116 * S2--/ | 2998 * S2--/ |
3117 */ | 2999 */ |
3118 | 3000 |
3119 GreedyLoopState::GreedyLoopState(bool not_at_start) { | 3001 GreedyLoopState::GreedyLoopState(bool not_at_start) { |
3120 counter_backtrack_trace_.set_backtrack(&label_); | 3002 counter_backtrack_trace_.set_backtrack(&label_); |
3121 if (not_at_start) counter_backtrack_trace_.set_at_start(false); | 3003 if (not_at_start) counter_backtrack_trace_.set_at_start(false); |
3122 } | 3004 } |
3123 | 3005 |
3124 | |
3125 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { | 3006 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { |
3126 #ifdef DEBUG | 3007 #ifdef DEBUG |
3127 intptr_t choice_count = alternatives_->length(); | 3008 intptr_t choice_count = alternatives_->length(); |
3128 for (intptr_t i = 0; i < choice_count - 1; i++) { | 3009 for (intptr_t i = 0; i < choice_count - 1; i++) { |
3129 GuardedAlternative alternative = alternatives_->At(i); | 3010 GuardedAlternative alternative = alternatives_->At(i); |
3130 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3011 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
3131 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3012 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
3132 for (intptr_t j = 0; j < guard_count; j++) { | 3013 for (intptr_t j = 0; j < guard_count; j++) { |
3133 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); | 3014 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); |
3134 } | 3015 } |
3135 } | 3016 } |
3136 #endif | 3017 #endif |
3137 } | 3018 } |
3138 | 3019 |
3139 | |
3140 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, | 3020 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
3141 Trace* current_trace, | 3021 Trace* current_trace, |
3142 PreloadState* state) { | 3022 PreloadState* state) { |
3143 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { | 3023 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { |
3144 // Save some time by looking at most one machine word ahead. | 3024 // Save some time by looking at most one machine word ahead. |
3145 state->eats_at_least_ = | 3025 state->eats_at_least_ = |
3146 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, | 3026 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, |
3147 current_trace->at_start() == Trace::FALSE_VALUE); | 3027 current_trace->at_start() == Trace::FALSE_VALUE); |
3148 } | 3028 } |
3149 state->preload_characters_ = | 3029 state->preload_characters_ = |
3150 CalculatePreloadCharacters(compiler, state->eats_at_least_); | 3030 CalculatePreloadCharacters(compiler, state->eats_at_least_); |
3151 | 3031 |
3152 state->preload_is_current_ = | 3032 state->preload_is_current_ = |
3153 (current_trace->characters_preloaded() == state->preload_characters_); | 3033 (current_trace->characters_preloaded() == state->preload_characters_); |
3154 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3034 state->preload_has_checked_bounds_ = state->preload_is_current_; |
3155 } | 3035 } |
3156 | 3036 |
3157 | |
3158 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3037 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3159 intptr_t choice_count = alternatives_->length(); | 3038 intptr_t choice_count = alternatives_->length(); |
3160 | 3039 |
3161 AssertGuardsMentionRegisters(trace); | 3040 AssertGuardsMentionRegisters(trace); |
3162 | 3041 |
3163 LimitResult limit_result = LimitVersions(compiler, trace); | 3042 LimitResult limit_result = LimitVersions(compiler, trace); |
3164 if (limit_result == DONE) return; | 3043 if (limit_result == DONE) return; |
3165 ASSERT(limit_result == CONTINUE); | 3044 ASSERT(limit_result == CONTINUE); |
3166 | 3045 |
3167 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for | 3046 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3253 | 3132 |
3254 macro_assembler->BindBlock(greedy_loop_state->label()); | 3133 macro_assembler->BindBlock(greedy_loop_state->label()); |
3255 // If we have unwound to the bottom then backtrack. | 3134 // If we have unwound to the bottom then backtrack. |
3256 macro_assembler->CheckGreedyLoop(trace->backtrack()); | 3135 macro_assembler->CheckGreedyLoop(trace->backtrack()); |
3257 // Otherwise try the second priority at an earlier position. | 3136 // Otherwise try the second priority at an earlier position. |
3258 macro_assembler->AdvanceCurrentPosition(-text_length); | 3137 macro_assembler->AdvanceCurrentPosition(-text_length); |
3259 macro_assembler->GoTo(&second_choice); | 3138 macro_assembler->GoTo(&second_choice); |
3260 return new_trace; | 3139 return new_trace; |
3261 } | 3140 } |
3262 | 3141 |
3263 | |
3264 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, | 3142 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
3265 Trace* trace) { | 3143 Trace* trace) { |
3266 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; | 3144 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; |
3267 if (alternatives_->length() != 2) return eats_at_least; | 3145 if (alternatives_->length() != 2) return eats_at_least; |
3268 | 3146 |
3269 GuardedAlternative alt1 = alternatives_->At(1); | 3147 GuardedAlternative alt1 = alternatives_->At(1); |
3270 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { | 3148 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { |
3271 return eats_at_least; | 3149 return eats_at_least; |
3272 } | 3150 } |
3273 RegExpNode* eats_anything_node = alt1.node(); | 3151 RegExpNode* eats_anything_node = alt1.node(); |
(...skipping 27 matching lines...) Expand all Loading... |
3301 GuardedAlternative alt0 = alternatives_->At(0); | 3179 GuardedAlternative alt0 = alternatives_->At(0); |
3302 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); | 3180 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); |
3303 } | 3181 } |
3304 } | 3182 } |
3305 if (bm != NULL) { | 3183 if (bm != NULL) { |
3306 bm->EmitSkipInstructions(macro_assembler); | 3184 bm->EmitSkipInstructions(macro_assembler); |
3307 } | 3185 } |
3308 return eats_at_least; | 3186 return eats_at_least; |
3309 } | 3187 } |
3310 | 3188 |
3311 | |
3312 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, | 3189 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, |
3313 AlternativeGenerationList* alt_gens, | 3190 AlternativeGenerationList* alt_gens, |
3314 intptr_t first_choice, | 3191 intptr_t first_choice, |
3315 Trace* trace, | 3192 Trace* trace, |
3316 PreloadState* preload) { | 3193 PreloadState* preload) { |
3317 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3194 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3318 SetUpPreLoad(compiler, trace, preload); | 3195 SetUpPreLoad(compiler, trace, preload); |
3319 | 3196 |
3320 // For now we just call all choices one after the other. The idea ultimately | 3197 // For now we just call all choices one after the other. The idea ultimately |
3321 // is to use the Dispatch table to try only the relevant ones. | 3198 // is to use the Dispatch table to try only the relevant ones. |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3386 for (intptr_t j = 0; j < guard_count; j++) { | 3263 for (intptr_t j = 0; j < guard_count; j++) { |
3387 GenerateGuard(macro_assembler, guards->At(j), &new_trace); | 3264 GenerateGuard(macro_assembler, guards->At(j), &new_trace); |
3388 } | 3265 } |
3389 alternative.node()->Emit(compiler, &new_trace); | 3266 alternative.node()->Emit(compiler, &new_trace); |
3390 preload->preload_is_current_ = false; | 3267 preload->preload_is_current_ = false; |
3391 } | 3268 } |
3392 macro_assembler->BindBlock(&alt_gen->after); | 3269 macro_assembler->BindBlock(&alt_gen->after); |
3393 } | 3270 } |
3394 } | 3271 } |
3395 | 3272 |
3396 | |
3397 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, | 3273 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, |
3398 Trace* trace, | 3274 Trace* trace, |
3399 GuardedAlternative alternative, | 3275 GuardedAlternative alternative, |
3400 AlternativeGeneration* alt_gen, | 3276 AlternativeGeneration* alt_gen, |
3401 intptr_t preload_characters, | 3277 intptr_t preload_characters, |
3402 bool next_expects_preload) { | 3278 bool next_expects_preload) { |
3403 if (!alt_gen->possible_success.IsLinked()) return; | 3279 if (!alt_gen->possible_success.IsLinked()) return; |
3404 | 3280 |
3405 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3281 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3406 macro_assembler->BindBlock(&alt_gen->possible_success); | 3282 macro_assembler->BindBlock(&alt_gen->possible_success); |
(...skipping 19 matching lines...) Expand all Loading... |
3426 macro_assembler->GoTo(&(alt_gen->after)); | 3302 macro_assembler->GoTo(&(alt_gen->after)); |
3427 } else { | 3303 } else { |
3428 out_of_line_trace.set_backtrack(&(alt_gen->after)); | 3304 out_of_line_trace.set_backtrack(&(alt_gen->after)); |
3429 for (intptr_t j = 0; j < guard_count; j++) { | 3305 for (intptr_t j = 0; j < guard_count; j++) { |
3430 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3306 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
3431 } | 3307 } |
3432 alternative.node()->Emit(compiler, &out_of_line_trace); | 3308 alternative.node()->Emit(compiler, &out_of_line_trace); |
3433 } | 3309 } |
3434 } | 3310 } |
3435 | 3311 |
3436 | |
3437 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3312 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3438 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3313 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3439 LimitResult limit_result = LimitVersions(compiler, trace); | 3314 LimitResult limit_result = LimitVersions(compiler, trace); |
3440 if (limit_result == DONE) return; | 3315 if (limit_result == DONE) return; |
3441 ASSERT(limit_result == CONTINUE); | 3316 ASSERT(limit_result == CONTINUE); |
3442 | 3317 |
3443 RecursionCheck rc(compiler); | 3318 RecursionCheck rc(compiler); |
3444 | 3319 |
3445 switch (action_type_) { | 3320 switch (action_type_) { |
3446 case STORE_POSITION: { | 3321 case STORE_POSITION: { |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3547 | 3422 |
3548 ASSERT(trace->backtrack() == NULL); | 3423 ASSERT(trace->backtrack() == NULL); |
3549 assembler->Backtrack(); | 3424 assembler->Backtrack(); |
3550 return; | 3425 return; |
3551 } | 3426 } |
3552 default: | 3427 default: |
3553 UNREACHABLE(); | 3428 UNREACHABLE(); |
3554 } | 3429 } |
3555 } | 3430 } |
3556 | 3431 |
3557 | |
3558 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3432 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3559 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3433 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3560 if (!trace->is_trivial()) { | 3434 if (!trace->is_trivial()) { |
3561 trace->Flush(compiler, this); | 3435 trace->Flush(compiler, this); |
3562 return; | 3436 return; |
3563 } | 3437 } |
3564 | 3438 |
3565 LimitResult limit_result = LimitVersions(compiler, trace); | 3439 LimitResult limit_result = LimitVersions(compiler, trace); |
3566 if (limit_result == DONE) return; | 3440 if (limit_result == DONE) return; |
3567 ASSERT(limit_result == CONTINUE); | 3441 ASSERT(limit_result == CONTINUE); |
3568 | 3442 |
3569 RecursionCheck rc(compiler); | 3443 RecursionCheck rc(compiler); |
3570 | 3444 |
3571 ASSERT(start_reg_ + 1 == end_reg_); | 3445 ASSERT(start_reg_ + 1 == end_reg_); |
3572 if (compiler->ignore_case()) { | 3446 if (compiler->ignore_case()) { |
3573 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); | 3447 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); |
3574 } else { | 3448 } else { |
3575 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 3449 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
3576 } | 3450 } |
3577 on_success()->Emit(compiler, trace); | 3451 on_success()->Emit(compiler, trace); |
3578 } | 3452 } |
3579 | 3453 |
3580 | |
3581 // ------------------------------------------------------------------- | 3454 // ------------------------------------------------------------------- |
3582 // Dot/dotty output | 3455 // Dot/dotty output |
3583 | 3456 |
3584 | |
3585 #ifdef DEBUG | 3457 #ifdef DEBUG |
3586 | 3458 |
3587 | |
3588 class DotPrinter : public NodeVisitor { | 3459 class DotPrinter : public NodeVisitor { |
3589 public: | 3460 public: |
3590 explicit DotPrinter(bool ignore_case) {} | 3461 explicit DotPrinter(bool ignore_case) {} |
3591 void PrintNode(const char* label, RegExpNode* node); | 3462 void PrintNode(const char* label, RegExpNode* node); |
3592 void Visit(RegExpNode* node); | 3463 void Visit(RegExpNode* node); |
3593 void PrintAttributes(RegExpNode* from); | 3464 void PrintAttributes(RegExpNode* from); |
3594 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | 3465 void PrintOnFailure(RegExpNode* from, RegExpNode* to); |
3595 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); | 3466 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); |
3596 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 3467 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
3597 #undef DECLARE_VISIT | 3468 #undef DECLARE_VISIT |
3598 }; | 3469 }; |
3599 | 3470 |
3600 | |
3601 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { | 3471 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { |
3602 OS::Print("digraph G {\n graph [label=\""); | 3472 OS::Print("digraph G {\n graph [label=\""); |
3603 for (intptr_t i = 0; label[i]; i++) { | 3473 for (intptr_t i = 0; label[i]; i++) { |
3604 switch (label[i]) { | 3474 switch (label[i]) { |
3605 case '\\': | 3475 case '\\': |
3606 OS::Print("\\\\"); | 3476 OS::Print("\\\\"); |
3607 break; | 3477 break; |
3608 case '"': | 3478 case '"': |
3609 OS::Print("\""); | 3479 OS::Print("\""); |
3610 break; | 3480 break; |
3611 default: | 3481 default: |
3612 OS::Print("%c", label[i]); | 3482 OS::Print("%c", label[i]); |
3613 break; | 3483 break; |
3614 } | 3484 } |
3615 } | 3485 } |
3616 OS::Print("\"];\n"); | 3486 OS::Print("\"];\n"); |
3617 Visit(node); | 3487 Visit(node); |
3618 OS::Print("}\n"); | 3488 OS::Print("}\n"); |
3619 } | 3489 } |
3620 | 3490 |
3621 | |
3622 void DotPrinter::Visit(RegExpNode* node) { | 3491 void DotPrinter::Visit(RegExpNode* node) { |
3623 if (node->info()->visited) return; | 3492 if (node->info()->visited) return; |
3624 node->info()->visited = true; | 3493 node->info()->visited = true; |
3625 node->Accept(this); | 3494 node->Accept(this); |
3626 } | 3495 } |
3627 | 3496 |
3628 | |
3629 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 3497 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
3630 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); | 3498 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); |
3631 Visit(on_failure); | 3499 Visit(on_failure); |
3632 } | 3500 } |
3633 | 3501 |
3634 | |
3635 class AttributePrinter : public ValueObject { | 3502 class AttributePrinter : public ValueObject { |
3636 public: | 3503 public: |
3637 AttributePrinter() : first_(true) {} | 3504 AttributePrinter() : first_(true) {} |
3638 void PrintSeparator() { | 3505 void PrintSeparator() { |
3639 if (first_) { | 3506 if (first_) { |
3640 first_ = false; | 3507 first_ = false; |
3641 } else { | 3508 } else { |
3642 OS::Print("|"); | 3509 OS::Print("|"); |
3643 } | 3510 } |
3644 } | 3511 } |
3645 void PrintBit(const char* name, bool value) { | 3512 void PrintBit(const char* name, bool value) { |
3646 if (!value) return; | 3513 if (!value) return; |
3647 PrintSeparator(); | 3514 PrintSeparator(); |
3648 OS::Print("{%s}", name); | 3515 OS::Print("{%s}", name); |
3649 } | 3516 } |
3650 void PrintPositive(const char* name, intptr_t value) { | 3517 void PrintPositive(const char* name, intptr_t value) { |
3651 if (value < 0) return; | 3518 if (value < 0) return; |
3652 PrintSeparator(); | 3519 PrintSeparator(); |
3653 OS::Print("{%s|%" Pd "}", name, value); | 3520 OS::Print("{%s|%" Pd "}", name, value); |
3654 } | 3521 } |
3655 | 3522 |
3656 private: | 3523 private: |
3657 bool first_; | 3524 bool first_; |
3658 }; | 3525 }; |
3659 | 3526 |
3660 | |
3661 void DotPrinter::PrintAttributes(RegExpNode* that) { | 3527 void DotPrinter::PrintAttributes(RegExpNode* that) { |
3662 OS::Print( | 3528 OS::Print( |
3663 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " | 3529 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " |
3664 "margin=0.1, fontsize=10, label=\"{", | 3530 "margin=0.1, fontsize=10, label=\"{", |
3665 that); | 3531 that); |
3666 AttributePrinter printer; | 3532 AttributePrinter printer; |
3667 NodeInfo* info = that->info(); | 3533 NodeInfo* info = that->info(); |
3668 printer.PrintBit("NI", info->follows_newline_interest); | 3534 printer.PrintBit("NI", info->follows_newline_interest); |
3669 printer.PrintBit("WI", info->follows_word_interest); | 3535 printer.PrintBit("WI", info->follows_word_interest); |
3670 printer.PrintBit("SI", info->follows_start_interest); | 3536 printer.PrintBit("SI", info->follows_start_interest); |
3671 BlockLabel* label = that->label(); | 3537 BlockLabel* label = that->label(); |
3672 if (label->IsBound()) printer.PrintPositive("@", label->Position()); | 3538 if (label->IsBound()) printer.PrintPositive("@", label->Position()); |
3673 OS::Print( | 3539 OS::Print( |
3674 "}\"];\n" | 3540 "}\"];\n" |
3675 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", | 3541 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", |
3676 that, that); | 3542 that, that); |
3677 } | 3543 } |
3678 | 3544 |
3679 | |
3680 void DotPrinter::VisitChoice(ChoiceNode* that) { | 3545 void DotPrinter::VisitChoice(ChoiceNode* that) { |
3681 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 3546 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
3682 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3547 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
3683 GuardedAlternative alt = that->alternatives()->At(i); | 3548 GuardedAlternative alt = that->alternatives()->At(i); |
3684 OS::Print(" n%p -> n%p", that, alt.node()); | 3549 OS::Print(" n%p -> n%p", that, alt.node()); |
3685 } | 3550 } |
3686 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3551 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
3687 GuardedAlternative alt = that->alternatives()->At(i); | 3552 GuardedAlternative alt = that->alternatives()->At(i); |
3688 alt.node()->Accept(this); | 3553 alt.node()->Accept(this); |
3689 } | 3554 } |
3690 } | 3555 } |
3691 | 3556 |
3692 | |
3693 void DotPrinter::VisitText(TextNode* that) { | 3557 void DotPrinter::VisitText(TextNode* that) { |
3694 OS::Print(" n%p [label=\"", that); | 3558 OS::Print(" n%p [label=\"", that); |
3695 for (intptr_t i = 0; i < that->elements()->length(); i++) { | 3559 for (intptr_t i = 0; i < that->elements()->length(); i++) { |
3696 if (i > 0) OS::Print(" "); | 3560 if (i > 0) OS::Print(" "); |
3697 TextElement elm = that->elements()->At(i); | 3561 TextElement elm = that->elements()->At(i); |
3698 switch (elm.text_type()) { | 3562 switch (elm.text_type()) { |
3699 case TextElement::ATOM: { | 3563 case TextElement::ATOM: { |
3700 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); | 3564 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); |
3701 for (intptr_t i = 0; i < data->length(); i++) { | 3565 for (intptr_t i = 0; i < data->length(); i++) { |
3702 OS::Print("%c", static_cast<char>(data->At(i))); | 3566 OS::Print("%c", static_cast<char>(data->At(i))); |
(...skipping 16 matching lines...) Expand all Loading... |
3719 default: | 3583 default: |
3720 UNREACHABLE(); | 3584 UNREACHABLE(); |
3721 } | 3585 } |
3722 } | 3586 } |
3723 OS::Print("\", shape=box, peripheries=2];\n"); | 3587 OS::Print("\", shape=box, peripheries=2];\n"); |
3724 PrintAttributes(that); | 3588 PrintAttributes(that); |
3725 OS::Print(" n%p -> n%p;\n", that, that->on_success()); | 3589 OS::Print(" n%p -> n%p;\n", that, that->on_success()); |
3726 Visit(that->on_success()); | 3590 Visit(that->on_success()); |
3727 } | 3591 } |
3728 | 3592 |
3729 | |
3730 void DotPrinter::VisitBackReference(BackReferenceNode* that) { | 3593 void DotPrinter::VisitBackReference(BackReferenceNode* that) { |
3731 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", | 3594 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", |
3732 that, that->start_register(), that->end_register()); | 3595 that, that->start_register(), that->end_register()); |
3733 PrintAttributes(that); | 3596 PrintAttributes(that); |
3734 OS::Print(" n%p -> n%p;\n", that, that->on_success()); | 3597 OS::Print(" n%p -> n%p;\n", that, that->on_success()); |
3735 Visit(that->on_success()); | 3598 Visit(that->on_success()); |
3736 } | 3599 } |
3737 | 3600 |
3738 | |
3739 void DotPrinter::VisitEnd(EndNode* that) { | 3601 void DotPrinter::VisitEnd(EndNode* that) { |
3740 OS::Print(" n%p [style=bold, shape=point];\n", that); | 3602 OS::Print(" n%p [style=bold, shape=point];\n", that); |
3741 PrintAttributes(that); | 3603 PrintAttributes(that); |
3742 } | 3604 } |
3743 | 3605 |
3744 | |
3745 void DotPrinter::VisitAssertion(AssertionNode* that) { | 3606 void DotPrinter::VisitAssertion(AssertionNode* that) { |
3746 OS::Print(" n%p [", that); | 3607 OS::Print(" n%p [", that); |
3747 switch (that->assertion_type()) { | 3608 switch (that->assertion_type()) { |
3748 case AssertionNode::AT_END: | 3609 case AssertionNode::AT_END: |
3749 OS::Print("label=\"$\", shape=septagon"); | 3610 OS::Print("label=\"$\", shape=septagon"); |
3750 break; | 3611 break; |
3751 case AssertionNode::AT_START: | 3612 case AssertionNode::AT_START: |
3752 OS::Print("label=\"^\", shape=septagon"); | 3613 OS::Print("label=\"^\", shape=septagon"); |
3753 break; | 3614 break; |
3754 case AssertionNode::AT_BOUNDARY: | 3615 case AssertionNode::AT_BOUNDARY: |
3755 OS::Print("label=\"\\b\", shape=septagon"); | 3616 OS::Print("label=\"\\b\", shape=septagon"); |
3756 break; | 3617 break; |
3757 case AssertionNode::AT_NON_BOUNDARY: | 3618 case AssertionNode::AT_NON_BOUNDARY: |
3758 OS::Print("label=\"\\B\", shape=septagon"); | 3619 OS::Print("label=\"\\B\", shape=septagon"); |
3759 break; | 3620 break; |
3760 case AssertionNode::AFTER_NEWLINE: | 3621 case AssertionNode::AFTER_NEWLINE: |
3761 OS::Print("label=\"(?<=\\n)\", shape=septagon"); | 3622 OS::Print("label=\"(?<=\\n)\", shape=septagon"); |
3762 break; | 3623 break; |
3763 } | 3624 } |
3764 OS::Print("];\n"); | 3625 OS::Print("];\n"); |
3765 PrintAttributes(that); | 3626 PrintAttributes(that); |
3766 RegExpNode* successor = that->on_success(); | 3627 RegExpNode* successor = that->on_success(); |
3767 OS::Print(" n%p -> n%p;\n", that, successor); | 3628 OS::Print(" n%p -> n%p;\n", that, successor); |
3768 Visit(successor); | 3629 Visit(successor); |
3769 } | 3630 } |
3770 | 3631 |
3771 | |
3772 void DotPrinter::VisitAction(ActionNode* that) { | 3632 void DotPrinter::VisitAction(ActionNode* that) { |
3773 OS::Print(" n%p [", that); | 3633 OS::Print(" n%p [", that); |
3774 switch (that->action_type_) { | 3634 switch (that->action_type_) { |
3775 case ActionNode::SET_REGISTER: | 3635 case ActionNode::SET_REGISTER: |
3776 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", | 3636 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", |
3777 that->data_.u_store_register.reg, | 3637 that->data_.u_store_register.reg, |
3778 that->data_.u_store_register.value); | 3638 that->data_.u_store_register.value); |
3779 break; | 3639 break; |
3780 case ActionNode::INCREMENT_REGISTER: | 3640 case ActionNode::INCREMENT_REGISTER: |
3781 OS::Print("label=\"$%" Pd "++\", shape=octagon", | 3641 OS::Print("label=\"$%" Pd "++\", shape=octagon", |
(...skipping 23 matching lines...) Expand all Loading... |
3805 break; | 3665 break; |
3806 } | 3666 } |
3807 } | 3667 } |
3808 OS::Print("];\n"); | 3668 OS::Print("];\n"); |
3809 PrintAttributes(that); | 3669 PrintAttributes(that); |
3810 RegExpNode* successor = that->on_success(); | 3670 RegExpNode* successor = that->on_success(); |
3811 OS::Print(" n%p -> n%p;\n", that, successor); | 3671 OS::Print(" n%p -> n%p;\n", that, successor); |
3812 Visit(successor); | 3672 Visit(successor); |
3813 } | 3673 } |
3814 | 3674 |
3815 | |
3816 void RegExpEngine::DotPrint(const char* label, | 3675 void RegExpEngine::DotPrint(const char* label, |
3817 RegExpNode* node, | 3676 RegExpNode* node, |
3818 bool ignore_case) { | 3677 bool ignore_case) { |
3819 DotPrinter printer(ignore_case); | 3678 DotPrinter printer(ignore_case); |
3820 printer.PrintNode(label, node); | 3679 printer.PrintNode(label, node); |
3821 } | 3680 } |
3822 | 3681 |
3823 | |
3824 #endif // DEBUG | 3682 #endif // DEBUG |
3825 | 3683 |
3826 | |
3827 // ------------------------------------------------------------------- | 3684 // ------------------------------------------------------------------- |
3828 // Tree to graph conversion | 3685 // Tree to graph conversion |
3829 | 3686 |
3830 // The zone in which we allocate graph nodes. | 3687 // The zone in which we allocate graph nodes. |
3831 #define OZ (on_success->zone()) | 3688 #define OZ (on_success->zone()) |
3832 | 3689 |
3833 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 3690 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
3834 RegExpNode* on_success) { | 3691 RegExpNode* on_success) { |
3835 ZoneGrowableArray<TextElement>* elms = | 3692 ZoneGrowableArray<TextElement>* elms = |
3836 new (OZ) ZoneGrowableArray<TextElement>(1); | 3693 new (OZ) ZoneGrowableArray<TextElement>(1); |
3837 elms->Add(TextElement::Atom(this)); | 3694 elms->Add(TextElement::Atom(this)); |
3838 return new (OZ) TextNode(elms, on_success); | 3695 return new (OZ) TextNode(elms, on_success); |
3839 } | 3696 } |
3840 | 3697 |
3841 | |
3842 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 3698 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
3843 RegExpNode* on_success) { | 3699 RegExpNode* on_success) { |
3844 ZoneGrowableArray<TextElement>* elms = | 3700 ZoneGrowableArray<TextElement>* elms = |
3845 new (OZ) ZoneGrowableArray<TextElement>(1); | 3701 new (OZ) ZoneGrowableArray<TextElement>(1); |
3846 for (intptr_t i = 0; i < elements()->length(); i++) { | 3702 for (intptr_t i = 0; i < elements()->length(); i++) { |
3847 elms->Add(elements()->At(i)); | 3703 elms->Add(elements()->At(i)); |
3848 } | 3704 } |
3849 return new (OZ) TextNode(elms, on_success); | 3705 return new (OZ) TextNode(elms, on_success); |
3850 } | 3706 } |
3851 | 3707 |
3852 | |
3853 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, | 3708 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, |
3854 const intptr_t* special_class, | 3709 const intptr_t* special_class, |
3855 intptr_t length) { | 3710 intptr_t length) { |
3856 length--; // Remove final 0x10000. | 3711 length--; // Remove final 0x10000. |
3857 ASSERT(special_class[length] == 0x10000); | 3712 ASSERT(special_class[length] == 0x10000); |
3858 ASSERT(ranges->length() != 0); | 3713 ASSERT(ranges->length() != 0); |
3859 ASSERT(length != 0); | 3714 ASSERT(length != 0); |
3860 ASSERT(special_class[0] != 0); | 3715 ASSERT(special_class[0] != 0); |
3861 if (ranges->length() != (length >> 1) + 1) { | 3716 if (ranges->length() != (length >> 1) + 1) { |
3862 return false; | 3717 return false; |
(...skipping 10 matching lines...) Expand all Loading... |
3873 if (special_class[i + 1] != range.from()) { | 3728 if (special_class[i + 1] != range.from()) { |
3874 return false; | 3729 return false; |
3875 } | 3730 } |
3876 } | 3731 } |
3877 if (range.to() != 0xffff) { | 3732 if (range.to() != 0xffff) { |
3878 return false; | 3733 return false; |
3879 } | 3734 } |
3880 return true; | 3735 return true; |
3881 } | 3736 } |
3882 | 3737 |
3883 | |
3884 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, | 3738 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, |
3885 const intptr_t* special_class, | 3739 const intptr_t* special_class, |
3886 intptr_t length) { | 3740 intptr_t length) { |
3887 length--; // Remove final 0x10000. | 3741 length--; // Remove final 0x10000. |
3888 ASSERT(special_class[length] == 0x10000); | 3742 ASSERT(special_class[length] == 0x10000); |
3889 if (ranges->length() * 2 != length) { | 3743 if (ranges->length() * 2 != length) { |
3890 return false; | 3744 return false; |
3891 } | 3745 } |
3892 for (intptr_t i = 0; i < length; i += 2) { | 3746 for (intptr_t i = 0; i < length; i += 2) { |
3893 CharacterRange range = ranges->At(i >> 1); | 3747 CharacterRange range = ranges->At(i >> 1); |
3894 if (range.from() != special_class[i] || | 3748 if (range.from() != special_class[i] || |
3895 range.to() != special_class[i + 1] - 1) { | 3749 range.to() != special_class[i + 1] - 1) { |
3896 return false; | 3750 return false; |
3897 } | 3751 } |
3898 } | 3752 } |
3899 return true; | 3753 return true; |
3900 } | 3754 } |
3901 | 3755 |
3902 | |
3903 bool RegExpCharacterClass::is_standard() { | 3756 bool RegExpCharacterClass::is_standard() { |
3904 // TODO(lrn): Remove need for this function, by not throwing away information | 3757 // TODO(lrn): Remove need for this function, by not throwing away information |
3905 // along the way. | 3758 // along the way. |
3906 if (is_negated_) { | 3759 if (is_negated_) { |
3907 return false; | 3760 return false; |
3908 } | 3761 } |
3909 if (set_.is_standard()) { | 3762 if (set_.is_standard()) { |
3910 return true; | 3763 return true; |
3911 } | 3764 } |
3912 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3765 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
(...skipping 18 matching lines...) Expand all Loading... |
3931 set_.set_standard_set_type('w'); | 3784 set_.set_standard_set_type('w'); |
3932 return true; | 3785 return true; |
3933 } | 3786 } |
3934 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 3787 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
3935 set_.set_standard_set_type('W'); | 3788 set_.set_standard_set_type('W'); |
3936 return true; | 3789 return true; |
3937 } | 3790 } |
3938 return false; | 3791 return false; |
3939 } | 3792 } |
3940 | 3793 |
3941 | |
3942 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 3794 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
3943 RegExpNode* on_success) { | 3795 RegExpNode* on_success) { |
3944 return new (OZ) TextNode(this, on_success); | 3796 return new (OZ) TextNode(this, on_success); |
3945 } | 3797 } |
3946 | 3798 |
3947 | |
3948 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 3799 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
3949 RegExpNode* on_success) { | 3800 RegExpNode* on_success) { |
3950 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 3801 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
3951 intptr_t length = alternatives->length(); | 3802 intptr_t length = alternatives->length(); |
3952 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); | 3803 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); |
3953 for (intptr_t i = 0; i < length; i++) { | 3804 for (intptr_t i = 0; i < length; i++) { |
3954 GuardedAlternative alternative( | 3805 GuardedAlternative alternative( |
3955 alternatives->At(i)->ToNode(compiler, on_success)); | 3806 alternatives->At(i)->ToNode(compiler, on_success)); |
3956 result->AddAlternative(alternative); | 3807 result->AddAlternative(alternative); |
3957 } | 3808 } |
3958 return result; | 3809 return result; |
3959 } | 3810 } |
3960 | 3811 |
3961 | |
3962 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 3812 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
3963 RegExpNode* on_success) { | 3813 RegExpNode* on_success) { |
3964 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); | 3814 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); |
3965 } | 3815 } |
3966 | 3816 |
3967 | |
3968 // Scoped object to keep track of how much we unroll quantifier loops in the | 3817 // Scoped object to keep track of how much we unroll quantifier loops in the |
3969 // regexp graph generator. | 3818 // regexp graph generator. |
3970 class RegExpExpansionLimiter : public ValueObject { | 3819 class RegExpExpansionLimiter : public ValueObject { |
3971 public: | 3820 public: |
3972 static const intptr_t kMaxExpansionFactor = 6; | 3821 static const intptr_t kMaxExpansionFactor = 6; |
3973 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) | 3822 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) |
3974 : compiler_(compiler), | 3823 : compiler_(compiler), |
3975 saved_expansion_factor_(compiler->current_expansion_factor()), | 3824 saved_expansion_factor_(compiler->current_expansion_factor()), |
3976 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { | 3825 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { |
3977 ASSERT(factor > 0); | 3826 ASSERT(factor > 0); |
(...skipping 17 matching lines...) Expand all Loading... |
3995 bool ok_to_expand() { return ok_to_expand_; } | 3844 bool ok_to_expand() { return ok_to_expand_; } |
3996 | 3845 |
3997 private: | 3846 private: |
3998 RegExpCompiler* compiler_; | 3847 RegExpCompiler* compiler_; |
3999 intptr_t saved_expansion_factor_; | 3848 intptr_t saved_expansion_factor_; |
4000 bool ok_to_expand_; | 3849 bool ok_to_expand_; |
4001 | 3850 |
4002 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | 3851 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
4003 }; | 3852 }; |
4004 | 3853 |
4005 | |
4006 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, | 3854 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, |
4007 intptr_t max, | 3855 intptr_t max, |
4008 bool is_greedy, | 3856 bool is_greedy, |
4009 RegExpTree* body, | 3857 RegExpTree* body, |
4010 RegExpCompiler* compiler, | 3858 RegExpCompiler* compiler, |
4011 RegExpNode* on_success, | 3859 RegExpNode* on_success, |
4012 bool not_at_start) { | 3860 bool not_at_start) { |
4013 // x{f, t} becomes this: | 3861 // x{f, t} becomes this: |
4014 // | 3862 // |
4015 // (r++)<-. | 3863 // (r++)<-. |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4132 center->AddContinueAlternative(rest_alt); | 3980 center->AddContinueAlternative(rest_alt); |
4133 center->AddLoopAlternative(body_alt); | 3981 center->AddLoopAlternative(body_alt); |
4134 } | 3982 } |
4135 if (needs_counter) { | 3983 if (needs_counter) { |
4136 return ActionNode::SetRegister(reg_ctr, 0, center); | 3984 return ActionNode::SetRegister(reg_ctr, 0, center); |
4137 } else { | 3985 } else { |
4138 return center; | 3986 return center; |
4139 } | 3987 } |
4140 } | 3988 } |
4141 | 3989 |
4142 | |
4143 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 3990 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
4144 RegExpNode* on_success) { | 3991 RegExpNode* on_success) { |
4145 switch (assertion_type()) { | 3992 switch (assertion_type()) { |
4146 case START_OF_LINE: | 3993 case START_OF_LINE: |
4147 return AssertionNode::AfterNewline(on_success); | 3994 return AssertionNode::AfterNewline(on_success); |
4148 case START_OF_INPUT: | 3995 case START_OF_INPUT: |
4149 return AssertionNode::AtStart(on_success); | 3996 return AssertionNode::AtStart(on_success); |
4150 case BOUNDARY: | 3997 case BOUNDARY: |
4151 return AssertionNode::AtBoundary(on_success); | 3998 return AssertionNode::AtBoundary(on_success); |
4152 case NON_BOUNDARY: | 3999 case NON_BOUNDARY: |
(...skipping 28 matching lines...) Expand all Loading... |
4181 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 4028 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
4182 result->AddAlternative(end_alternative); | 4029 result->AddAlternative(end_alternative); |
4183 return result; | 4030 return result; |
4184 } | 4031 } |
4185 default: | 4032 default: |
4186 UNREACHABLE(); | 4033 UNREACHABLE(); |
4187 } | 4034 } |
4188 return on_success; | 4035 return on_success; |
4189 } | 4036 } |
4190 | 4037 |
4191 | |
4192 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 4038 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
4193 RegExpNode* on_success) { | 4039 RegExpNode* on_success) { |
4194 return new (OZ) | 4040 return new (OZ) |
4195 BackReferenceNode(RegExpCapture::StartRegister(index()), | 4041 BackReferenceNode(RegExpCapture::StartRegister(index()), |
4196 RegExpCapture::EndRegister(index()), on_success); | 4042 RegExpCapture::EndRegister(index()), on_success); |
4197 } | 4043 } |
4198 | 4044 |
4199 | |
4200 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 4045 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
4201 RegExpNode* on_success) { | 4046 RegExpNode* on_success) { |
4202 return on_success; | 4047 return on_success; |
4203 } | 4048 } |
4204 | 4049 |
4205 | |
4206 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 4050 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
4207 RegExpNode* on_success) { | 4051 RegExpNode* on_success) { |
4208 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4052 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
4209 intptr_t position_register = compiler->AllocateRegister(); | 4053 intptr_t position_register = compiler->AllocateRegister(); |
4210 | 4054 |
4211 const intptr_t registers_per_capture = 2; | 4055 const intptr_t registers_per_capture = 2; |
4212 const intptr_t register_of_first_capture = 2; | 4056 const intptr_t register_of_first_capture = 2; |
4213 intptr_t register_count = capture_count_ * registers_per_capture; | 4057 intptr_t register_count = capture_count_ * registers_per_capture; |
4214 intptr_t register_start = | 4058 intptr_t register_start = |
4215 register_of_first_capture + capture_from_ * registers_per_capture; | 4059 register_of_first_capture + capture_from_ * registers_per_capture; |
(...skipping 23 matching lines...) Expand all Loading... |
4239 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( | 4083 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( |
4240 stack_pointer_register, position_register, | 4084 stack_pointer_register, position_register, |
4241 register_count, register_start, OZ))); | 4085 register_count, register_start, OZ))); |
4242 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( | 4086 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( |
4243 body_alt, GuardedAlternative(on_success), OZ); | 4087 body_alt, GuardedAlternative(on_success), OZ); |
4244 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, | 4088 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, |
4245 choice_node); | 4089 choice_node); |
4246 } | 4090 } |
4247 } | 4091 } |
4248 | 4092 |
4249 | |
4250 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4093 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
4251 RegExpNode* on_success) { | 4094 RegExpNode* on_success) { |
4252 return ToNode(body(), index(), compiler, on_success); | 4095 return ToNode(body(), index(), compiler, on_success); |
4253 } | 4096 } |
4254 | 4097 |
4255 | |
4256 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 4098 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
4257 intptr_t index, | 4099 intptr_t index, |
4258 RegExpCompiler* compiler, | 4100 RegExpCompiler* compiler, |
4259 RegExpNode* on_success) { | 4101 RegExpNode* on_success) { |
4260 intptr_t start_reg = RegExpCapture::StartRegister(index); | 4102 intptr_t start_reg = RegExpCapture::StartRegister(index); |
4261 intptr_t end_reg = RegExpCapture::EndRegister(index); | 4103 intptr_t end_reg = RegExpCapture::EndRegister(index); |
4262 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); | 4104 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
4263 RegExpNode* body_node = body->ToNode(compiler, store_end); | 4105 RegExpNode* body_node = body->ToNode(compiler, store_end); |
4264 return ActionNode::StorePosition(start_reg, true, body_node); | 4106 return ActionNode::StorePosition(start_reg, true, body_node); |
4265 } | 4107 } |
4266 | 4108 |
4267 | |
4268 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 4109 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
4269 RegExpNode* on_success) { | 4110 RegExpNode* on_success) { |
4270 ZoneGrowableArray<RegExpTree*>* children = nodes(); | 4111 ZoneGrowableArray<RegExpTree*>* children = nodes(); |
4271 RegExpNode* current = on_success; | 4112 RegExpNode* current = on_success; |
4272 for (intptr_t i = children->length() - 1; i >= 0; i--) { | 4113 for (intptr_t i = children->length() - 1; i >= 0; i--) { |
4273 current = children->At(i)->ToNode(compiler, current); | 4114 current = children->At(i)->ToNode(compiler, current); |
4274 } | 4115 } |
4275 return current; | 4116 return current; |
4276 } | 4117 } |
4277 | 4118 |
4278 | |
4279 static void AddClass(const intptr_t* elmv, | 4119 static void AddClass(const intptr_t* elmv, |
4280 intptr_t elmc, | 4120 intptr_t elmc, |
4281 ZoneGrowableArray<CharacterRange>* ranges) { | 4121 ZoneGrowableArray<CharacterRange>* ranges) { |
4282 elmc--; | 4122 elmc--; |
4283 ASSERT(elmv[elmc] == 0x10000); | 4123 ASSERT(elmv[elmc] == 0x10000); |
4284 for (intptr_t i = 0; i < elmc; i += 2) { | 4124 for (intptr_t i = 0; i < elmc; i += 2) { |
4285 ASSERT(elmv[i] < elmv[i + 1]); | 4125 ASSERT(elmv[i] < elmv[i + 1]); |
4286 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); | 4126 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); |
4287 } | 4127 } |
4288 } | 4128 } |
4289 | 4129 |
4290 | |
4291 static void AddClassNegated(const intptr_t* elmv, | 4130 static void AddClassNegated(const intptr_t* elmv, |
4292 intptr_t elmc, | 4131 intptr_t elmc, |
4293 ZoneGrowableArray<CharacterRange>* ranges) { | 4132 ZoneGrowableArray<CharacterRange>* ranges) { |
4294 elmc--; | 4133 elmc--; |
4295 ASSERT(elmv[elmc] == 0x10000); | 4134 ASSERT(elmv[elmc] == 0x10000); |
4296 ASSERT(elmv[0] != 0x0000); | 4135 ASSERT(elmv[0] != 0x0000); |
4297 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); | 4136 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); |
4298 uint16_t last = 0x0000; | 4137 uint16_t last = 0x0000; |
4299 for (intptr_t i = 0; i < elmc; i += 2) { | 4138 for (intptr_t i = 0; i < elmc; i += 2) { |
4300 ASSERT(last <= elmv[i] - 1); | 4139 ASSERT(last <= elmv[i] - 1); |
4301 ASSERT(elmv[i] < elmv[i + 1]); | 4140 ASSERT(elmv[i] < elmv[i + 1]); |
4302 ranges->Add(CharacterRange(last, elmv[i] - 1)); | 4141 ranges->Add(CharacterRange(last, elmv[i] - 1)); |
4303 last = elmv[i + 1]; | 4142 last = elmv[i + 1]; |
4304 } | 4143 } |
4305 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); | 4144 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); |
4306 } | 4145 } |
4307 | 4146 |
4308 | |
4309 void CharacterRange::AddClassEscape(uint16_t type, | 4147 void CharacterRange::AddClassEscape(uint16_t type, |
4310 ZoneGrowableArray<CharacterRange>* ranges) { | 4148 ZoneGrowableArray<CharacterRange>* ranges) { |
4311 switch (type) { | 4149 switch (type) { |
4312 case 's': | 4150 case 's': |
4313 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); | 4151 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); |
4314 break; | 4152 break; |
4315 case 'S': | 4153 case 'S': |
4316 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); | 4154 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); |
4317 break; | 4155 break; |
4318 case 'w': | 4156 case 'w': |
(...skipping 20 matching lines...) Expand all Loading... |
4339 // This is the set of characters matched by the $ and ^ symbols | 4177 // This is the set of characters matched by the $ and ^ symbols |
4340 // in multiline mode. | 4178 // in multiline mode. |
4341 case 'n': | 4179 case 'n': |
4342 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); | 4180 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); |
4343 break; | 4181 break; |
4344 default: | 4182 default: |
4345 UNREACHABLE(); | 4183 UNREACHABLE(); |
4346 } | 4184 } |
4347 } | 4185 } |
4348 | 4186 |
4349 | |
4350 void CharacterRange::AddCaseEquivalents( | 4187 void CharacterRange::AddCaseEquivalents( |
4351 ZoneGrowableArray<CharacterRange>* ranges, | 4188 ZoneGrowableArray<CharacterRange>* ranges, |
4352 bool is_one_byte, | 4189 bool is_one_byte, |
4353 Zone* zone) { | 4190 Zone* zone) { |
4354 uint16_t bottom = from(); | 4191 uint16_t bottom = from(); |
4355 uint16_t top = to(); | 4192 uint16_t top = to(); |
4356 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { | 4193 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { |
4357 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; | 4194 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; |
4358 if (top > Symbols::kMaxOneCharCodeSymbol) { | 4195 if (top > Symbols::kMaxOneCharCodeSymbol) { |
4359 top = Symbols::kMaxOneCharCodeSymbol; | 4196 top = Symbols::kMaxOneCharCodeSymbol; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4411 uint16_t range_to = c - (block_end - end); | 4248 uint16_t range_to = c - (block_end - end); |
4412 if (!(bottom <= range_from && range_to <= top)) { | 4249 if (!(bottom <= range_from && range_to <= top)) { |
4413 ranges->Add(CharacterRange(range_from, range_to)); | 4250 ranges->Add(CharacterRange(range_from, range_to)); |
4414 } | 4251 } |
4415 } | 4252 } |
4416 pos = end + 1; | 4253 pos = end + 1; |
4417 } | 4254 } |
4418 } | 4255 } |
4419 } | 4256 } |
4420 | 4257 |
4421 | |
4422 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { | 4258 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { |
4423 ASSERT(ranges != NULL); | 4259 ASSERT(ranges != NULL); |
4424 intptr_t n = ranges->length(); | 4260 intptr_t n = ranges->length(); |
4425 if (n <= 1) return true; | 4261 if (n <= 1) return true; |
4426 intptr_t max = ranges->At(0).to(); | 4262 intptr_t max = ranges->At(0).to(); |
4427 for (intptr_t i = 1; i < n; i++) { | 4263 for (intptr_t i = 1; i < n; i++) { |
4428 CharacterRange next_range = ranges->At(i); | 4264 CharacterRange next_range = ranges->At(i); |
4429 if (next_range.from() <= max + 1) return false; | 4265 if (next_range.from() <= max + 1) return false; |
4430 max = next_range.to(); | 4266 max = next_range.to(); |
4431 } | 4267 } |
4432 return true; | 4268 return true; |
4433 } | 4269 } |
4434 | 4270 |
4435 | |
4436 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { | 4271 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { |
4437 if (ranges_ == NULL) { | 4272 if (ranges_ == NULL) { |
4438 ranges_ = new ZoneGrowableArray<CharacterRange>(2); | 4273 ranges_ = new ZoneGrowableArray<CharacterRange>(2); |
4439 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4274 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
4440 } | 4275 } |
4441 return ranges_; | 4276 return ranges_; |
4442 } | 4277 } |
4443 | 4278 |
4444 | |
4445 // Move a number of elements in a zone array to another position | 4279 // Move a number of elements in a zone array to another position |
4446 // in the same array. Handles overlapping source and target areas. | 4280 // in the same array. Handles overlapping source and target areas. |
4447 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, | 4281 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, |
4448 intptr_t from, | 4282 intptr_t from, |
4449 intptr_t to, | 4283 intptr_t to, |
4450 intptr_t count) { | 4284 intptr_t count) { |
4451 // Ranges are potentially overlapping. | 4285 // Ranges are potentially overlapping. |
4452 if (from < to) { | 4286 if (from < to) { |
4453 for (intptr_t i = count - 1; i >= 0; i--) { | 4287 for (intptr_t i = count - 1; i >= 0; i--) { |
4454 (*list)[to + i] = list->At(from + i); | 4288 (*list)[to + i] = list->At(from + i); |
4455 } | 4289 } |
4456 } else { | 4290 } else { |
4457 for (intptr_t i = 0; i < count; i++) { | 4291 for (intptr_t i = 0; i < count; i++) { |
4458 (*list)[to + i] = list->At(from + i); | 4292 (*list)[to + i] = list->At(from + i); |
4459 } | 4293 } |
4460 } | 4294 } |
4461 } | 4295 } |
4462 | 4296 |
4463 | |
4464 static intptr_t InsertRangeInCanonicalList( | 4297 static intptr_t InsertRangeInCanonicalList( |
4465 ZoneGrowableArray<CharacterRange>* list, | 4298 ZoneGrowableArray<CharacterRange>* list, |
4466 intptr_t count, | 4299 intptr_t count, |
4467 CharacterRange insert) { | 4300 CharacterRange insert) { |
4468 // Inserts a range into list[0..count[, which must be sorted | 4301 // Inserts a range into list[0..count[, which must be sorted |
4469 // by from value and non-overlapping and non-adjacent, using at most | 4302 // by from value and non-overlapping and non-adjacent, using at most |
4470 // list[0..count] for the result. Returns the number of resulting | 4303 // list[0..count] for the result. Returns the number of resulting |
4471 // canonicalized ranges. Inserting a range may collapse existing ranges into | 4304 // canonicalized ranges. Inserting a range may collapse existing ranges into |
4472 // fewer ranges, so the return value can be anything in the range 1..count+1. | 4305 // fewer ranges, so the return value can be anything in the range 1..count+1. |
4473 uint16_t from = insert.from(); | 4306 uint16_t from = insert.from(); |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4512 | 4345 |
4513 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); | 4346 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); |
4514 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); | 4347 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); |
4515 if (end_pos < count) { | 4348 if (end_pos < count) { |
4516 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); | 4349 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); |
4517 } | 4350 } |
4518 (*list)[start_pos] = CharacterRange(new_from, new_to); | 4351 (*list)[start_pos] = CharacterRange(new_from, new_to); |
4519 return count - (end_pos - start_pos) + 1; | 4352 return count - (end_pos - start_pos) + 1; |
4520 } | 4353 } |
4521 | 4354 |
4522 | |
4523 void CharacterSet::Canonicalize() { | 4355 void CharacterSet::Canonicalize() { |
4524 // Special/default classes are always considered canonical. The result | 4356 // Special/default classes are always considered canonical. The result |
4525 // of calling ranges() will be sorted. | 4357 // of calling ranges() will be sorted. |
4526 if (ranges_ == NULL) return; | 4358 if (ranges_ == NULL) return; |
4527 CharacterRange::Canonicalize(ranges_); | 4359 CharacterRange::Canonicalize(ranges_); |
4528 } | 4360 } |
4529 | 4361 |
4530 | |
4531 void CharacterRange::Canonicalize( | 4362 void CharacterRange::Canonicalize( |
4532 ZoneGrowableArray<CharacterRange>* character_ranges) { | 4363 ZoneGrowableArray<CharacterRange>* character_ranges) { |
4533 if (character_ranges->length() <= 1) return; | 4364 if (character_ranges->length() <= 1) return; |
4534 // Check whether ranges are already canonical (increasing, non-overlapping, | 4365 // Check whether ranges are already canonical (increasing, non-overlapping, |
4535 // non-adjacent). | 4366 // non-adjacent). |
4536 intptr_t n = character_ranges->length(); | 4367 intptr_t n = character_ranges->length(); |
4537 intptr_t max = character_ranges->At(0).to(); | 4368 intptr_t max = character_ranges->At(0).to(); |
4538 intptr_t i = 1; | 4369 intptr_t i = 1; |
4539 while (i < n) { | 4370 while (i < n) { |
4540 CharacterRange current = character_ranges->At(i); | 4371 CharacterRange current = character_ranges->At(i); |
(...skipping 16 matching lines...) Expand all Loading... |
4557 do { | 4388 do { |
4558 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, | 4389 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, |
4559 character_ranges->At(read)); | 4390 character_ranges->At(read)); |
4560 read++; | 4391 read++; |
4561 } while (read < n); | 4392 } while (read < n); |
4562 character_ranges->TruncateTo(num_canonical); | 4393 character_ranges->TruncateTo(num_canonical); |
4563 | 4394 |
4564 ASSERT(CharacterRange::IsCanonical(character_ranges)); | 4395 ASSERT(CharacterRange::IsCanonical(character_ranges)); |
4565 } | 4396 } |
4566 | 4397 |
4567 | |
4568 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, | 4398 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, |
4569 ZoneGrowableArray<CharacterRange>* negated_ranges) { | 4399 ZoneGrowableArray<CharacterRange>* negated_ranges) { |
4570 ASSERT(CharacterRange::IsCanonical(ranges)); | 4400 ASSERT(CharacterRange::IsCanonical(ranges)); |
4571 ASSERT(negated_ranges->length() == 0); | 4401 ASSERT(negated_ranges->length() == 0); |
4572 intptr_t range_count = ranges->length(); | 4402 intptr_t range_count = ranges->length(); |
4573 uint16_t from = 0; | 4403 uint16_t from = 0; |
4574 intptr_t i = 0; | 4404 intptr_t i = 0; |
4575 if (range_count > 0 && ranges->At(0).from() == 0) { | 4405 if (range_count > 0 && ranges->At(0).from() == 0) { |
4576 from = ranges->At(0).to(); | 4406 from = ranges->At(0).to(); |
4577 i = 1; | 4407 i = 1; |
4578 } | 4408 } |
4579 while (i < range_count) { | 4409 while (i < range_count) { |
4580 CharacterRange range = ranges->At(i); | 4410 CharacterRange range = ranges->At(i); |
4581 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); | 4411 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); |
4582 from = range.to(); | 4412 from = range.to(); |
4583 i++; | 4413 i++; |
4584 } | 4414 } |
4585 if (from < Utf16::kMaxCodeUnit) { | 4415 if (from < Utf16::kMaxCodeUnit) { |
4586 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); | 4416 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); |
4587 } | 4417 } |
4588 } | 4418 } |
4589 | 4419 |
4590 | |
4591 // ------------------------------------------------------------------- | 4420 // ------------------------------------------------------------------- |
4592 // Splay tree | 4421 // Splay tree |
4593 | 4422 |
4594 | |
4595 // Workaround for the fact that ZoneGrowableArray does not have contains(). | 4423 // Workaround for the fact that ZoneGrowableArray does not have contains(). |
4596 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { | 4424 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { |
4597 for (intptr_t i = 0; i < array->length(); i++) { | 4425 for (intptr_t i = 0; i < array->length(); i++) { |
4598 if (array->At(i) == value) { | 4426 if (array->At(i) == value) { |
4599 return true; | 4427 return true; |
4600 } | 4428 } |
4601 } | 4429 } |
4602 return false; | 4430 return false; |
4603 } | 4431 } |
4604 | 4432 |
4605 | |
4606 void OutSet::Set(unsigned value, Zone* zone) { | 4433 void OutSet::Set(unsigned value, Zone* zone) { |
4607 if (value < kFirstLimit) { | 4434 if (value < kFirstLimit) { |
4608 first_ |= (1 << value); | 4435 first_ |= (1 << value); |
4609 } else { | 4436 } else { |
4610 if (remaining_ == NULL) | 4437 if (remaining_ == NULL) |
4611 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); | 4438 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); |
4612 | 4439 |
4613 bool remaining_contains_value = ArrayContains(remaining_, value); | 4440 bool remaining_contains_value = ArrayContains(remaining_, value); |
4614 if (remaining_->is_empty() || !remaining_contains_value) { | 4441 if (remaining_->is_empty() || !remaining_contains_value) { |
4615 remaining_->Add(value); | 4442 remaining_->Add(value); |
4616 } | 4443 } |
4617 } | 4444 } |
4618 } | 4445 } |
4619 | 4446 |
4620 | |
4621 bool OutSet::Get(unsigned value) const { | 4447 bool OutSet::Get(unsigned value) const { |
4622 if (value < kFirstLimit) { | 4448 if (value < kFirstLimit) { |
4623 return (first_ & (1 << value)) != 0; | 4449 return (first_ & (1 << value)) != 0; |
4624 } else if (remaining_ == NULL) { | 4450 } else if (remaining_ == NULL) { |
4625 return false; | 4451 return false; |
4626 } else { | 4452 } else { |
4627 return ArrayContains(remaining_, value); | 4453 return ArrayContains(remaining_, value); |
4628 } | 4454 } |
4629 } | 4455 } |
4630 | 4456 |
4631 | |
4632 // ------------------------------------------------------------------- | 4457 // ------------------------------------------------------------------- |
4633 // Analysis | 4458 // Analysis |
4634 | 4459 |
4635 | |
4636 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4460 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
4637 if (that->info()->been_analyzed || that->info()->being_analyzed) return; | 4461 if (that->info()->been_analyzed || that->info()->being_analyzed) return; |
4638 that->info()->being_analyzed = true; | 4462 that->info()->being_analyzed = true; |
4639 that->Accept(this); | 4463 that->Accept(this); |
4640 that->info()->being_analyzed = false; | 4464 that->info()->being_analyzed = false; |
4641 that->info()->been_analyzed = true; | 4465 that->info()->been_analyzed = true; |
4642 } | 4466 } |
4643 | 4467 |
4644 | |
4645 void Analysis::VisitEnd(EndNode* that) { | 4468 void Analysis::VisitEnd(EndNode* that) { |
4646 // nothing to do | 4469 // nothing to do |
4647 } | 4470 } |
4648 | 4471 |
4649 | |
4650 void TextNode::CalculateOffsets() { | 4472 void TextNode::CalculateOffsets() { |
4651 intptr_t element_count = elements()->length(); | 4473 intptr_t element_count = elements()->length(); |
4652 // Set up the offsets of the elements relative to the start. This is a fixed | 4474 // Set up the offsets of the elements relative to the start. This is a fixed |
4653 // quantity since a TextNode can only contain fixed-width things. | 4475 // quantity since a TextNode can only contain fixed-width things. |
4654 intptr_t cp_offset = 0; | 4476 intptr_t cp_offset = 0; |
4655 for (intptr_t i = 0; i < element_count; i++) { | 4477 for (intptr_t i = 0; i < element_count; i++) { |
4656 TextElement& elm = (*elements())[i]; | 4478 TextElement& elm = (*elements())[i]; |
4657 elm.set_cp_offset(cp_offset); | 4479 elm.set_cp_offset(cp_offset); |
4658 cp_offset += elm.length(); | 4480 cp_offset += elm.length(); |
4659 } | 4481 } |
4660 } | 4482 } |
4661 | 4483 |
4662 | |
4663 void Analysis::VisitText(TextNode* that) { | 4484 void Analysis::VisitText(TextNode* that) { |
4664 if (ignore_case_) { | 4485 if (ignore_case_) { |
4665 that->MakeCaseIndependent(is_one_byte_); | 4486 that->MakeCaseIndependent(is_one_byte_); |
4666 } | 4487 } |
4667 EnsureAnalyzed(that->on_success()); | 4488 EnsureAnalyzed(that->on_success()); |
4668 if (!has_failed()) { | 4489 if (!has_failed()) { |
4669 that->CalculateOffsets(); | 4490 that->CalculateOffsets(); |
4670 } | 4491 } |
4671 } | 4492 } |
4672 | 4493 |
4673 | |
4674 void Analysis::VisitAction(ActionNode* that) { | 4494 void Analysis::VisitAction(ActionNode* that) { |
4675 RegExpNode* target = that->on_success(); | 4495 RegExpNode* target = that->on_success(); |
4676 EnsureAnalyzed(target); | 4496 EnsureAnalyzed(target); |
4677 if (!has_failed()) { | 4497 if (!has_failed()) { |
4678 // If the next node is interested in what it follows then this node | 4498 // If the next node is interested in what it follows then this node |
4679 // has to be interested too so it can pass the information on. | 4499 // has to be interested too so it can pass the information on. |
4680 that->info()->AddFromFollowing(target->info()); | 4500 that->info()->AddFromFollowing(target->info()); |
4681 } | 4501 } |
4682 } | 4502 } |
4683 | 4503 |
4684 | |
4685 void Analysis::VisitChoice(ChoiceNode* that) { | 4504 void Analysis::VisitChoice(ChoiceNode* that) { |
4686 NodeInfo* info = that->info(); | 4505 NodeInfo* info = that->info(); |
4687 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 4506 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
4688 RegExpNode* node = (*that->alternatives())[i].node(); | 4507 RegExpNode* node = (*that->alternatives())[i].node(); |
4689 EnsureAnalyzed(node); | 4508 EnsureAnalyzed(node); |
4690 if (has_failed()) return; | 4509 if (has_failed()) return; |
4691 // Anything the following nodes need to know has to be known by | 4510 // Anything the following nodes need to know has to be known by |
4692 // this node also, so it can pass it on. | 4511 // this node also, so it can pass it on. |
4693 info->AddFromFollowing(node->info()); | 4512 info->AddFromFollowing(node->info()); |
4694 } | 4513 } |
4695 } | 4514 } |
4696 | 4515 |
4697 | |
4698 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { | 4516 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { |
4699 NodeInfo* info = that->info(); | 4517 NodeInfo* info = that->info(); |
4700 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 4518 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
4701 RegExpNode* node = (*that->alternatives())[i].node(); | 4519 RegExpNode* node = (*that->alternatives())[i].node(); |
4702 if (node != that->loop_node()) { | 4520 if (node != that->loop_node()) { |
4703 EnsureAnalyzed(node); | 4521 EnsureAnalyzed(node); |
4704 if (has_failed()) return; | 4522 if (has_failed()) return; |
4705 info->AddFromFollowing(node->info()); | 4523 info->AddFromFollowing(node->info()); |
4706 } | 4524 } |
4707 } | 4525 } |
4708 // Check the loop last since it may need the value of this node | 4526 // Check the loop last since it may need the value of this node |
4709 // to get a correct result. | 4527 // to get a correct result. |
4710 EnsureAnalyzed(that->loop_node()); | 4528 EnsureAnalyzed(that->loop_node()); |
4711 if (!has_failed()) { | 4529 if (!has_failed()) { |
4712 info->AddFromFollowing(that->loop_node()->info()); | 4530 info->AddFromFollowing(that->loop_node()->info()); |
4713 } | 4531 } |
4714 } | 4532 } |
4715 | 4533 |
4716 | |
4717 void Analysis::VisitBackReference(BackReferenceNode* that) { | 4534 void Analysis::VisitBackReference(BackReferenceNode* that) { |
4718 EnsureAnalyzed(that->on_success()); | 4535 EnsureAnalyzed(that->on_success()); |
4719 } | 4536 } |
4720 | 4537 |
4721 | |
4722 void Analysis::VisitAssertion(AssertionNode* that) { | 4538 void Analysis::VisitAssertion(AssertionNode* that) { |
4723 EnsureAnalyzed(that->on_success()); | 4539 EnsureAnalyzed(that->on_success()); |
4724 } | 4540 } |
4725 | 4541 |
4726 | |
4727 void BackReferenceNode::FillInBMInfo(intptr_t offset, | 4542 void BackReferenceNode::FillInBMInfo(intptr_t offset, |
4728 intptr_t budget, | 4543 intptr_t budget, |
4729 BoyerMooreLookahead* bm, | 4544 BoyerMooreLookahead* bm, |
4730 bool not_at_start) { | 4545 bool not_at_start) { |
4731 // Working out the set of characters that a backreference can match is too | 4546 // Working out the set of characters that a backreference can match is too |
4732 // hard, so we just say that any character can match. | 4547 // hard, so we just say that any character can match. |
4733 bm->SetRest(offset); | 4548 bm->SetRest(offset); |
4734 SaveBMInfo(bm, not_at_start, offset); | 4549 SaveBMInfo(bm, not_at_start, offset); |
4735 } | 4550 } |
4736 | 4551 |
4737 | |
4738 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == | 4552 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == |
4739 RegExpMacroAssembler::kTableSize); | 4553 RegExpMacroAssembler::kTableSize); |
4740 | 4554 |
4741 | |
4742 void ChoiceNode::FillInBMInfo(intptr_t offset, | 4555 void ChoiceNode::FillInBMInfo(intptr_t offset, |
4743 intptr_t budget, | 4556 intptr_t budget, |
4744 BoyerMooreLookahead* bm, | 4557 BoyerMooreLookahead* bm, |
4745 bool not_at_start) { | 4558 bool not_at_start) { |
4746 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); | 4559 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); |
4747 budget = (budget - 1) / alts->length(); | 4560 budget = (budget - 1) / alts->length(); |
4748 for (intptr_t i = 0; i < alts->length(); i++) { | 4561 for (intptr_t i = 0; i < alts->length(); i++) { |
4749 GuardedAlternative& alt = (*alts)[i]; | 4562 GuardedAlternative& alt = (*alts)[i]; |
4750 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 4563 if (alt.guards() != NULL && alt.guards()->length() != 0) { |
4751 bm->SetRest(offset); // Give up trying to fill in info. | 4564 bm->SetRest(offset); // Give up trying to fill in info. |
4752 SaveBMInfo(bm, not_at_start, offset); | 4565 SaveBMInfo(bm, not_at_start, offset); |
4753 return; | 4566 return; |
4754 } | 4567 } |
4755 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); | 4568 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); |
4756 } | 4569 } |
4757 SaveBMInfo(bm, not_at_start, offset); | 4570 SaveBMInfo(bm, not_at_start, offset); |
4758 } | 4571 } |
4759 | 4572 |
4760 | |
4761 void TextNode::FillInBMInfo(intptr_t initial_offset, | 4573 void TextNode::FillInBMInfo(intptr_t initial_offset, |
4762 intptr_t budget, | 4574 intptr_t budget, |
4763 BoyerMooreLookahead* bm, | 4575 BoyerMooreLookahead* bm, |
4764 bool not_at_start) { | 4576 bool not_at_start) { |
4765 if (initial_offset >= bm->length()) return; | 4577 if (initial_offset >= bm->length()) return; |
4766 intptr_t offset = initial_offset; | 4578 intptr_t offset = initial_offset; |
4767 intptr_t max_char = bm->max_char(); | 4579 intptr_t max_char = bm->max_char(); |
4768 for (intptr_t i = 0; i < elements()->length(); i++) { | 4580 for (intptr_t i = 0; i < elements()->length(); i++) { |
4769 if (offset >= bm->length()) { | 4581 if (offset >= bm->length()) { |
4770 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4582 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4811 } | 4623 } |
4812 if (offset >= bm->length()) { | 4624 if (offset >= bm->length()) { |
4813 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4625 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4814 return; | 4626 return; |
4815 } | 4627 } |
4816 on_success()->FillInBMInfo(offset, budget - 1, bm, | 4628 on_success()->FillInBMInfo(offset, budget - 1, bm, |
4817 true); // Not at start after a text node. | 4629 true); // Not at start after a text node. |
4818 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4630 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4819 } | 4631 } |
4820 | 4632 |
4821 | |
4822 #if !defined(DART_PRECOMPILED_RUNTIME) | 4633 #if !defined(DART_PRECOMPILED_RUNTIME) |
4823 RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 4634 RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
4824 RegExpCompileData* data, | 4635 RegExpCompileData* data, |
4825 const ParsedFunction* parsed_function, | 4636 const ParsedFunction* parsed_function, |
4826 const ZoneGrowableArray<const ICData*>& ic_data_array, | 4637 const ZoneGrowableArray<const ICData*>& ic_data_array, |
4827 intptr_t osr_id) { | 4638 intptr_t osr_id) { |
4828 ASSERT(!FLAG_interpret_irregexp); | 4639 ASSERT(!FLAG_interpret_irregexp); |
4829 Zone* zone = Thread::Current()->zone(); | 4640 Zone* zone = Thread::Current()->zone(); |
4830 | 4641 |
4831 const Function& function = parsed_function->function(); | 4642 const Function& function = parsed_function->function(); |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4923 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); | 4734 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
4924 | 4735 |
4925 if (FLAG_trace_irregexp) { | 4736 if (FLAG_trace_irregexp) { |
4926 macro_assembler->PrintBlocks(); | 4737 macro_assembler->PrintBlocks(); |
4927 } | 4738 } |
4928 | 4739 |
4929 return result; | 4740 return result; |
4930 } | 4741 } |
4931 #endif // !defined(DART_PRECOMPILED_RUNTIME) | 4742 #endif // !defined(DART_PRECOMPILED_RUNTIME) |
4932 | 4743 |
4933 | |
4934 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 4744 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
4935 RegExpCompileData* data, | 4745 RegExpCompileData* data, |
4936 const RegExp& regexp, | 4746 const RegExp& regexp, |
4937 bool is_one_byte, | 4747 bool is_one_byte, |
4938 bool is_sticky, | 4748 bool is_sticky, |
4939 Zone* zone) { | 4749 Zone* zone) { |
4940 ASSERT(FLAG_interpret_irregexp); | 4750 ASSERT(FLAG_interpret_irregexp); |
4941 const String& pattern = String::Handle(zone, regexp.pattern()); | 4751 const String& pattern = String::Handle(zone, regexp.pattern()); |
4942 | 4752 |
4943 ASSERT(!regexp.IsNull()); | 4753 ASSERT(!regexp.IsNull()); |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5026 RegExpEngine::CompilationResult result = | 4836 RegExpEngine::CompilationResult result = |
5027 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); | 4837 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
5028 | 4838 |
5029 if (FLAG_trace_irregexp) { | 4839 if (FLAG_trace_irregexp) { |
5030 macro_assembler->PrintBlocks(); | 4840 macro_assembler->PrintBlocks(); |
5031 } | 4841 } |
5032 | 4842 |
5033 return result; | 4843 return result; |
5034 } | 4844 } |
5035 | 4845 |
5036 | |
5037 static void CreateSpecializedFunction(Thread* thread, | 4846 static void CreateSpecializedFunction(Thread* thread, |
5038 Zone* zone, | 4847 Zone* zone, |
5039 const RegExp& regexp, | 4848 const RegExp& regexp, |
5040 intptr_t specialization_cid, | 4849 intptr_t specialization_cid, |
5041 bool sticky, | 4850 bool sticky, |
5042 const Object& owner) { | 4851 const Object& owner) { |
5043 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | 4852 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
5044 | 4853 |
5045 Function& fn = | 4854 Function& fn = |
5046 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), | 4855 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), |
(...skipping 27 matching lines...) Expand all Loading... |
5074 | 4883 |
5075 // Cache the result. | 4884 // Cache the result. |
5076 regexp.set_function(specialization_cid, sticky, fn); | 4885 regexp.set_function(specialization_cid, sticky, fn); |
5077 | 4886 |
5078 fn.SetRegExpData(regexp, specialization_cid, sticky); | 4887 fn.SetRegExpData(regexp, specialization_cid, sticky); |
5079 fn.set_is_debuggable(false); | 4888 fn.set_is_debuggable(false); |
5080 | 4889 |
5081 // The function is compiled lazily during the first call. | 4890 // The function is compiled lazily during the first call. |
5082 } | 4891 } |
5083 | 4892 |
5084 | |
5085 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, | 4893 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, |
5086 const String& pattern, | 4894 const String& pattern, |
5087 bool multi_line, | 4895 bool multi_line, |
5088 bool ignore_case) { | 4896 bool ignore_case) { |
5089 Zone* zone = thread->zone(); | 4897 Zone* zone = thread->zone(); |
5090 const RegExp& regexp = RegExp::Handle(RegExp::New()); | 4898 const RegExp& regexp = RegExp::Handle(RegExp::New()); |
5091 | 4899 |
5092 regexp.set_pattern(pattern); | 4900 regexp.set_pattern(pattern); |
5093 | 4901 |
5094 if (multi_line) { | 4902 if (multi_line) { |
(...skipping 18 matching lines...) Expand all Loading... |
5113 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, | 4921 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, |
5114 owner); | 4922 owner); |
5115 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, | 4923 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, |
5116 owner); | 4924 owner); |
5117 } | 4925 } |
5118 } | 4926 } |
5119 | 4927 |
5120 return regexp.raw(); | 4928 return regexp.raw(); |
5121 } | 4929 } |
5122 | 4930 |
5123 | |
5124 } // namespace dart | 4931 } // namespace dart |
OLD | NEW |