Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(955)

Side by Side Diff: runtime/vm/regexp.cc

Issue 2974233002: VM: Re-format to use at most one newline between functions (Closed)
Patch Set: Rebase and merge Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp.h" 5 #include "vm/regexp.h"
6 6
7 #include "vm/dart_entry.h" 7 #include "vm/dart_entry.h"
8 #include "vm/regexp_assembler.h" 8 #include "vm/regexp_assembler.h"
9 #include "vm/regexp_assembler_bytecode.h" 9 #include "vm/regexp_assembler_bytecode.h"
10 #include "vm/regexp_assembler_ir.h" 10 #include "vm/regexp_assembler_ir.h"
11 #include "vm/regexp_ast.h" 11 #include "vm/regexp_ast.h"
12 #include "vm/symbols.h"
13 #include "vm/thread.h"
12 #include "vm/unibrow-inl.h" 14 #include "vm/unibrow-inl.h"
13 #include "vm/unicode.h" 15 #include "vm/unicode.h"
14 #include "vm/symbols.h"
15 #include "vm/thread.h"
16 16
17 #define Z (zone()) 17 #define Z (zone())
18 18
19 namespace dart { 19 namespace dart {
20 20
21 // Default to generating optimized regexp code. 21 // Default to generating optimized regexp code.
22 static const bool kRegexpOptimization = true; 22 static const bool kRegexpOptimization = true;
23 23
24 // More makes code generation slower, less makes V8 benchmark score lower. 24 // More makes code generation slower, less makes V8 benchmark score lower.
25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; 25 static const intptr_t kMaxLookaheadForBoyerMoore = 8;
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
189 // string. Therefore code generated for a non-trivial trace is specialized 189 // string. Therefore code generated for a non-trivial trace is specialized
190 // to that trace. The code generator therefore has the ability to generate 190 // to that trace. The code generator therefore has the ability to generate
191 // code for each node several times. In order to limit the size of the 191 // code for each node several times. In order to limit the size of the
192 // generated code there is an arbitrary limit on how many specialized sets of 192 // generated code there is an arbitrary limit on how many specialized sets of
193 // code may be generated for a given node. If the limit is reached, the 193 // code may be generated for a given node. If the limit is reached, the
194 // trace is flushed and a generic version of the code for a node is emitted. 194 // trace is flushed and a generic version of the code for a node is emitted.
195 // This is subsequently used for that node. The code emitted for non-generic 195 // This is subsequently used for that node. The code emitted for non-generic
196 // trace is not recorded in the node and so it cannot currently be reused in 196 // trace is not recorded in the node and so it cannot currently be reused in
197 // the event that code generation is requested for an identical trace. 197 // the event that code generation is requested for an identical trace.
198 198
199
200 void RegExpTree::AppendToText(RegExpText* text) { 199 void RegExpTree::AppendToText(RegExpText* text) {
201 UNREACHABLE(); 200 UNREACHABLE();
202 } 201 }
203 202
204
205 void RegExpAtom::AppendToText(RegExpText* text) { 203 void RegExpAtom::AppendToText(RegExpText* text) {
206 text->AddElement(TextElement::Atom(this)); 204 text->AddElement(TextElement::Atom(this));
207 } 205 }
208 206
209
210 void RegExpCharacterClass::AppendToText(RegExpText* text) { 207 void RegExpCharacterClass::AppendToText(RegExpText* text) {
211 text->AddElement(TextElement::CharClass(this)); 208 text->AddElement(TextElement::CharClass(this));
212 } 209 }
213 210
214
215 void RegExpText::AppendToText(RegExpText* text) { 211 void RegExpText::AppendToText(RegExpText* text) {
216 for (intptr_t i = 0; i < elements()->length(); i++) 212 for (intptr_t i = 0; i < elements()->length(); i++)
217 text->AddElement((*elements())[i]); 213 text->AddElement((*elements())[i]);
218 } 214 }
219 215
220
221 TextElement TextElement::Atom(RegExpAtom* atom) { 216 TextElement TextElement::Atom(RegExpAtom* atom) {
222 return TextElement(ATOM, atom); 217 return TextElement(ATOM, atom);
223 } 218 }
224 219
225
226 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { 220 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
227 return TextElement(CHAR_CLASS, char_class); 221 return TextElement(CHAR_CLASS, char_class);
228 } 222 }
229 223
230
231 intptr_t TextElement::length() const { 224 intptr_t TextElement::length() const {
232 switch (text_type()) { 225 switch (text_type()) {
233 case ATOM: 226 case ATOM:
234 return atom()->length(); 227 return atom()->length();
235 228
236 case CHAR_CLASS: 229 case CHAR_CLASS:
237 return 1; 230 return 1;
238 } 231 }
239 UNREACHABLE(); 232 UNREACHABLE();
240 return 0; 233 return 0;
241 } 234 }
242 235
243
244 class FrequencyCollator : public ValueObject { 236 class FrequencyCollator : public ValueObject {
245 public: 237 public:
246 FrequencyCollator() : total_samples_(0) { 238 FrequencyCollator() : total_samples_(0) {
247 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { 239 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
248 frequencies_[i] = CharacterFrequency(i); 240 frequencies_[i] = CharacterFrequency(i);
249 } 241 }
250 } 242 }
251 243
252 void CountCharacter(intptr_t character) { 244 void CountCharacter(intptr_t character) {
253 intptr_t index = (character & RegExpMacroAssembler::kTableMask); 245 intptr_t index = (character & RegExpMacroAssembler::kTableMask);
(...skipping 22 matching lines...) Expand all
276 intptr_t counter() { return counter_; } 268 intptr_t counter() { return counter_; }
277 intptr_t character() { return character_; } 269 intptr_t character() { return character_; }
278 270
279 private: 271 private:
280 intptr_t counter_; 272 intptr_t counter_;
281 intptr_t character_; 273 intptr_t character_;
282 274
283 DISALLOW_ALLOCATION(); 275 DISALLOW_ALLOCATION();
284 }; 276 };
285 277
286
287 private: 278 private:
288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 279 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
289 intptr_t total_samples_; 280 intptr_t total_samples_;
290 }; 281 };
291 282
292
293 class RegExpCompiler : public ValueObject { 283 class RegExpCompiler : public ValueObject {
294 public: 284 public:
295 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); 285 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte);
296 286
297 intptr_t AllocateRegister() { return next_register_++; } 287 intptr_t AllocateRegister() { return next_register_++; }
298 288
299 #if !defined(DART_PRECOMPILED_RUNTIME) 289 #if !defined(DART_PRECOMPILED_RUNTIME)
300 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, 290 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,
301 RegExpNode* start, 291 RegExpNode* start,
302 intptr_t capture_count, 292 intptr_t capture_count,
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
345 intptr_t recursion_depth_; 335 intptr_t recursion_depth_;
346 RegExpMacroAssembler* macro_assembler_; 336 RegExpMacroAssembler* macro_assembler_;
347 bool ignore_case_; 337 bool ignore_case_;
348 bool is_one_byte_; 338 bool is_one_byte_;
349 bool reg_exp_too_big_; 339 bool reg_exp_too_big_;
350 intptr_t current_expansion_factor_; 340 intptr_t current_expansion_factor_;
351 FrequencyCollator frequency_collator_; 341 FrequencyCollator frequency_collator_;
352 Zone* zone_; 342 Zone* zone_;
353 }; 343 };
354 344
355
356 class RecursionCheck : public ValueObject { 345 class RecursionCheck : public ValueObject {
357 public: 346 public:
358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 347 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
359 compiler->IncrementRecursionDepth(); 348 compiler->IncrementRecursionDepth();
360 } 349 }
361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 350 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
362 351
363 private: 352 private:
364 RegExpCompiler* compiler_; 353 RegExpCompiler* compiler_;
365 }; 354 };
366 355
367
368 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 356 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
369 return RegExpEngine::CompilationResult("RegExp too big"); 357 return RegExpEngine::CompilationResult("RegExp too big");
370 } 358 }
371 359
372
373 // Attempts to compile the regexp using an Irregexp code generator. Returns 360 // Attempts to compile the regexp using an Irregexp code generator. Returns
374 // a fixed array or a null handle depending on whether it succeeded. 361 // a fixed array or a null handle depending on whether it succeeded.
375 RegExpCompiler::RegExpCompiler(intptr_t capture_count, 362 RegExpCompiler::RegExpCompiler(intptr_t capture_count,
376 bool ignore_case, 363 bool ignore_case,
377 bool is_one_byte) 364 bool is_one_byte)
378 : next_register_(2 * (capture_count + 1)), 365 : next_register_(2 * (capture_count + 1)),
379 work_list_(NULL), 366 work_list_(NULL),
380 recursion_depth_(0), 367 recursion_depth_(0),
381 ignore_case_(ignore_case), 368 ignore_case_(ignore_case),
382 is_one_byte_(is_one_byte), 369 is_one_byte_(is_one_byte),
383 reg_exp_too_big_(false), 370 reg_exp_too_big_(false),
384 current_expansion_factor_(1), 371 current_expansion_factor_(1),
385 zone_(Thread::Current()->zone()) { 372 zone_(Thread::Current()->zone()) {
386 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); 373 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z);
387 } 374 }
388 375
389
390 #if !defined(DART_PRECOMPILED_RUNTIME) 376 #if !defined(DART_PRECOMPILED_RUNTIME)
391 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 377 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
392 IRRegExpMacroAssembler* macro_assembler, 378 IRRegExpMacroAssembler* macro_assembler,
393 RegExpNode* start, 379 RegExpNode* start,
394 intptr_t capture_count, 380 intptr_t capture_count,
395 const String& pattern) { 381 const String& pattern) {
396 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); 382 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */);
397 macro_assembler_ = macro_assembler; 383 macro_assembler_ = macro_assembler;
398 384
399 ZoneGrowableArray<RegExpNode*> work_list(0); 385 ZoneGrowableArray<RegExpNode*> work_list(0);
(...skipping 12 matching lines...) Expand all
412 macro_assembler->GenerateBacktrackBlock(); 398 macro_assembler->GenerateBacktrackBlock();
413 macro_assembler->FinalizeRegistersArray(); 399 macro_assembler->FinalizeRegistersArray();
414 400
415 return RegExpEngine::CompilationResult( 401 return RegExpEngine::CompilationResult(
416 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), 402 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(),
417 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), 403 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(),
418 next_register_); 404 next_register_);
419 } 405 }
420 #endif 406 #endif
421 407
422
423 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 408 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
424 BytecodeRegExpMacroAssembler* macro_assembler, 409 BytecodeRegExpMacroAssembler* macro_assembler,
425 RegExpNode* start, 410 RegExpNode* start,
426 intptr_t capture_count, 411 intptr_t capture_count,
427 const String& pattern) { 412 const String& pattern) {
428 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); 413 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */);
429 macro_assembler_ = macro_assembler; 414 macro_assembler_ = macro_assembler;
430 415
431 ZoneGrowableArray<RegExpNode*> work_list(0); 416 ZoneGrowableArray<RegExpNode*> work_list(0);
432 work_list_ = &work_list; 417 work_list_ = &work_list;
433 BlockLabel fail; 418 BlockLabel fail;
434 macro_assembler_->PushBacktrack(&fail); 419 macro_assembler_->PushBacktrack(&fail);
435 Trace new_trace; 420 Trace new_trace;
436 start->Emit(this, &new_trace); 421 start->Emit(this, &new_trace);
437 macro_assembler_->BindBlock(&fail); 422 macro_assembler_->BindBlock(&fail);
438 macro_assembler_->Fail(); 423 macro_assembler_->Fail();
439 while (!work_list.is_empty()) { 424 while (!work_list.is_empty()) {
440 work_list.RemoveLast()->Emit(this, &new_trace); 425 work_list.RemoveLast()->Emit(this, &new_trace);
441 } 426 }
442 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); 427 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
443 428
444 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); 429 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode());
445 return RegExpEngine::CompilationResult(&bytecode, next_register_); 430 return RegExpEngine::CompilationResult(&bytecode, next_register_);
446 } 431 }
447 432
448
449 bool Trace::DeferredAction::Mentions(intptr_t that) { 433 bool Trace::DeferredAction::Mentions(intptr_t that) {
450 if (action_type() == ActionNode::CLEAR_CAPTURES) { 434 if (action_type() == ActionNode::CLEAR_CAPTURES) {
451 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 435 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
452 return range.Contains(that); 436 return range.Contains(that);
453 } else { 437 } else {
454 return reg() == that; 438 return reg() == that;
455 } 439 }
456 } 440 }
457 441
458
459 bool Trace::mentions_reg(intptr_t reg) { 442 bool Trace::mentions_reg(intptr_t reg) {
460 for (DeferredAction* action = actions_; action != NULL; 443 for (DeferredAction* action = actions_; action != NULL;
461 action = action->next()) { 444 action = action->next()) {
462 if (action->Mentions(reg)) return true; 445 if (action->Mentions(reg)) return true;
463 } 446 }
464 return false; 447 return false;
465 } 448 }
466 449
467
468 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { 450 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) {
469 ASSERT(*cp_offset == 0); 451 ASSERT(*cp_offset == 0);
470 for (DeferredAction* action = actions_; action != NULL; 452 for (DeferredAction* action = actions_; action != NULL;
471 action = action->next()) { 453 action = action->next()) {
472 if (action->Mentions(reg)) { 454 if (action->Mentions(reg)) {
473 if (action->action_type() == ActionNode::STORE_POSITION) { 455 if (action->action_type() == ActionNode::STORE_POSITION) {
474 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); 456 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
475 return true; 457 return true;
476 } else { 458 } else {
477 return false; 459 return false;
478 } 460 }
479 } 461 }
480 } 462 }
481 return false; 463 return false;
482 } 464 }
483 465
484
485 // This is called as we come into a loop choice node and some other tricky 466 // This is called as we come into a loop choice node and some other tricky
486 // nodes. It normalizes the state of the code generator to ensure we can 467 // nodes. It normalizes the state of the code generator to ensure we can
487 // generate generic code. 468 // generate generic code.
488 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { 469 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
489 intptr_t max_register = RegExpCompiler::kNoRegister; 470 intptr_t max_register = RegExpCompiler::kNoRegister;
490 for (DeferredAction* action = actions_; action != NULL; 471 for (DeferredAction* action = actions_; action != NULL;
491 action = action->next()) { 472 action = action->next()) {
492 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { 473 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
493 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); 474 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
494 for (intptr_t i = range.from(); i <= range.to(); i++) 475 for (intptr_t i = range.from(); i <= range.to(); i++)
495 affected_registers->Set(i, zone); 476 affected_registers->Set(i, zone);
496 if (range.to() > max_register) max_register = range.to(); 477 if (range.to() > max_register) max_register = range.to();
497 } else { 478 } else {
498 affected_registers->Set(action->reg(), zone); 479 affected_registers->Set(action->reg(), zone);
499 if (action->reg() > max_register) max_register = action->reg(); 480 if (action->reg() > max_register) max_register = action->reg();
500 } 481 }
501 } 482 }
502 return max_register; 483 return max_register;
503 } 484 }
504 485
505
506 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, 486 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
507 intptr_t max_register, 487 intptr_t max_register,
508 const OutSet& registers_to_pop, 488 const OutSet& registers_to_pop,
509 const OutSet& registers_to_clear) { 489 const OutSet& registers_to_clear) {
510 for (intptr_t reg = max_register; reg >= 0; reg--) { 490 for (intptr_t reg = max_register; reg >= 0; reg--) {
511 if (registers_to_pop.Get(reg)) { 491 if (registers_to_pop.Get(reg)) {
512 assembler->PopRegister(reg); 492 assembler->PopRegister(reg);
513 } else if (registers_to_clear.Get(reg)) { 493 } else if (registers_to_clear.Get(reg)) {
514 intptr_t clear_to = reg; 494 intptr_t clear_to = reg;
515 while (reg > 0 && registers_to_clear.Get(reg - 1)) { 495 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
516 reg--; 496 reg--;
517 } 497 }
518 assembler->ClearRegisters(reg, clear_to); 498 assembler->ClearRegisters(reg, clear_to);
519 } 499 }
520 } 500 }
521 } 501 }
522 502
523
524 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, 503 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
525 intptr_t max_register, 504 intptr_t max_register,
526 const OutSet& affected_registers, 505 const OutSet& affected_registers,
527 OutSet* registers_to_pop, 506 OutSet* registers_to_pop,
528 OutSet* registers_to_clear, 507 OutSet* registers_to_clear,
529 Zone* zone) { 508 Zone* zone) {
530 for (intptr_t reg = 0; reg <= max_register; reg++) { 509 for (intptr_t reg = 0; reg <= max_register; reg++) {
531 if (!affected_registers.Get(reg)) { 510 if (!affected_registers.Get(reg)) {
532 continue; 511 continue;
533 } 512 }
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
628 } else if (clear) { 607 } else if (clear) {
629 assembler->ClearRegisters(reg, reg); 608 assembler->ClearRegisters(reg, reg);
630 } else if (absolute) { 609 } else if (absolute) {
631 assembler->SetRegister(reg, value); 610 assembler->SetRegister(reg, value);
632 } else if (value != 0) { 611 } else if (value != 0) {
633 assembler->AdvanceRegister(reg, value); 612 assembler->AdvanceRegister(reg, value);
634 } 613 }
635 } 614 }
636 } 615 }
637 616
638
639 // This is called as we come into a loop choice node and some other tricky 617 // This is called as we come into a loop choice node and some other tricky
640 // nodes. It normalizes the state of the code generator to ensure we can 618 // nodes. It normalizes the state of the code generator to ensure we can
641 // generate generic code. 619 // generate generic code.
642 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 620 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
643 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 621 RegExpMacroAssembler* assembler = compiler->macro_assembler();
644 622
645 ASSERT(!is_trivial()); 623 ASSERT(!is_trivial());
646 624
647 if (actions_ == NULL && backtrack() == NULL) { 625 if (actions_ == NULL && backtrack() == NULL) {
648 // Here we just have some deferred cp advances to fix and we are back to 626 // Here we just have some deferred cp advances to fix and we are back to
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
685 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, 663 RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
686 registers_to_clear); 664 registers_to_clear);
687 if (backtrack() == NULL) { 665 if (backtrack() == NULL) {
688 assembler->Backtrack(); 666 assembler->Backtrack();
689 } else { 667 } else {
690 assembler->PopCurrentPosition(); 668 assembler->PopCurrentPosition();
691 assembler->GoTo(backtrack()); 669 assembler->GoTo(backtrack());
692 } 670 }
693 } 671 }
694 672
695
696 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { 673 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
697 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 674 RegExpMacroAssembler* assembler = compiler->macro_assembler();
698 675
699 // Omit flushing the trace. We discard the entire stack frame anyway. 676 // Omit flushing the trace. We discard the entire stack frame anyway.
700 677
701 if (!label()->IsBound()) { 678 if (!label()->IsBound()) {
702 // We are completely independent of the trace, since we ignore it, 679 // We are completely independent of the trace, since we ignore it,
703 // so this code can be used as the generic version. 680 // so this code can be used as the generic version.
704 assembler->BindBlock(label()); 681 assembler->BindBlock(label());
705 } 682 }
706 683
707 // Throw away everything on the backtrack stack since the start 684 // Throw away everything on the backtrack stack since the start
708 // of the negative submatch and restore the character position. 685 // of the negative submatch and restore the character position.
709 assembler->ReadCurrentPositionFromRegister(current_position_register_); 686 assembler->ReadCurrentPositionFromRegister(current_position_register_);
710 assembler->ReadStackPointerFromRegister(stack_pointer_register_); 687 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
711 if (clear_capture_count_ > 0) { 688 if (clear_capture_count_ > 0) {
712 // Clear any captures that might have been performed during the success 689 // Clear any captures that might have been performed during the success
713 // of the body of the negative look-ahead. 690 // of the body of the negative look-ahead.
714 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; 691 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
715 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); 692 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
716 } 693 }
717 // Now that we have unwound the stack we find at the top of the stack the 694 // Now that we have unwound the stack we find at the top of the stack the
718 // backtrack that the BeginSubmatch node got. 695 // backtrack that the BeginSubmatch node got.
719 assembler->Backtrack(); 696 assembler->Backtrack();
720 } 697 }
721 698
722
723 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { 699 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
724 if (!trace->is_trivial()) { 700 if (!trace->is_trivial()) {
725 trace->Flush(compiler, this); 701 trace->Flush(compiler, this);
726 return; 702 return;
727 } 703 }
728 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 704 RegExpMacroAssembler* assembler = compiler->macro_assembler();
729 if (!label()->IsBound()) { 705 if (!label()->IsBound()) {
730 assembler->BindBlock(label()); 706 assembler->BindBlock(label());
731 } 707 }
732 switch (action_) { 708 switch (action_) {
733 case ACCEPT: 709 case ACCEPT:
734 assembler->Succeed(); 710 assembler->Succeed();
735 return; 711 return;
736 case BACKTRACK: 712 case BACKTRACK:
737 assembler->GoTo(trace->backtrack()); 713 assembler->GoTo(trace->backtrack());
738 return; 714 return;
739 case NEGATIVE_SUBMATCH_SUCCESS: 715 case NEGATIVE_SUBMATCH_SUCCESS:
740 // This case is handled in a different virtual method. 716 // This case is handled in a different virtual method.
741 UNREACHABLE(); 717 UNREACHABLE();
742 } 718 }
743 UNIMPLEMENTED(); 719 UNIMPLEMENTED();
744 } 720 }
745 721
746
747 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { 722 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
748 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); 723 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1);
749 guards_->Add(guard); 724 guards_->Add(guard);
750 } 725 }
751 726
752
753 ActionNode* ActionNode::SetRegister(intptr_t reg, 727 ActionNode* ActionNode::SetRegister(intptr_t reg,
754 intptr_t val, 728 intptr_t val,
755 RegExpNode* on_success) { 729 RegExpNode* on_success) {
756 ActionNode* result = 730 ActionNode* result =
757 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); 731 new (on_success->zone()) ActionNode(SET_REGISTER, on_success);
758 result->data_.u_store_register.reg = reg; 732 result->data_.u_store_register.reg = reg;
759 result->data_.u_store_register.value = val; 733 result->data_.u_store_register.value = val;
760 return result; 734 return result;
761 } 735 }
762 736
763
764 ActionNode* ActionNode::IncrementRegister(intptr_t reg, 737 ActionNode* ActionNode::IncrementRegister(intptr_t reg,
765 RegExpNode* on_success) { 738 RegExpNode* on_success) {
766 ActionNode* result = 739 ActionNode* result =
767 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); 740 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
768 result->data_.u_increment_register.reg = reg; 741 result->data_.u_increment_register.reg = reg;
769 return result; 742 return result;
770 } 743 }
771 744
772
773 ActionNode* ActionNode::StorePosition(intptr_t reg, 745 ActionNode* ActionNode::StorePosition(intptr_t reg,
774 bool is_capture, 746 bool is_capture,
775 RegExpNode* on_success) { 747 RegExpNode* on_success) {
776 ActionNode* result = 748 ActionNode* result =
777 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); 749 new (on_success->zone()) ActionNode(STORE_POSITION, on_success);
778 result->data_.u_position_register.reg = reg; 750 result->data_.u_position_register.reg = reg;
779 result->data_.u_position_register.is_capture = is_capture; 751 result->data_.u_position_register.is_capture = is_capture;
780 return result; 752 return result;
781 } 753 }
782 754
783
784 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { 755 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
785 ActionNode* result = 756 ActionNode* result =
786 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); 757 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
787 result->data_.u_clear_captures.range_from = range.from(); 758 result->data_.u_clear_captures.range_from = range.from();
788 result->data_.u_clear_captures.range_to = range.to(); 759 result->data_.u_clear_captures.range_to = range.to();
789 return result; 760 return result;
790 } 761 }
791 762
792
793 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, 763 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg,
794 intptr_t position_reg, 764 intptr_t position_reg,
795 RegExpNode* on_success) { 765 RegExpNode* on_success) {
796 ActionNode* result = 766 ActionNode* result =
797 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); 767 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
798 result->data_.u_submatch.stack_pointer_register = stack_reg; 768 result->data_.u_submatch.stack_pointer_register = stack_reg;
799 result->data_.u_submatch.current_position_register = position_reg; 769 result->data_.u_submatch.current_position_register = position_reg;
800 return result; 770 return result;
801 } 771 }
802 772
803
804 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, 773 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg,
805 intptr_t position_reg, 774 intptr_t position_reg,
806 intptr_t clear_register_count, 775 intptr_t clear_register_count,
807 intptr_t clear_register_from, 776 intptr_t clear_register_from,
808 RegExpNode* on_success) { 777 RegExpNode* on_success) {
809 ActionNode* result = new (on_success->zone()) 778 ActionNode* result = new (on_success->zone())
810 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); 779 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
811 result->data_.u_submatch.stack_pointer_register = stack_reg; 780 result->data_.u_submatch.stack_pointer_register = stack_reg;
812 result->data_.u_submatch.current_position_register = position_reg; 781 result->data_.u_submatch.current_position_register = position_reg;
813 result->data_.u_submatch.clear_register_count = clear_register_count; 782 result->data_.u_submatch.clear_register_count = clear_register_count;
814 result->data_.u_submatch.clear_register_from = clear_register_from; 783 result->data_.u_submatch.clear_register_from = clear_register_from;
815 return result; 784 return result;
816 } 785 }
817 786
818
819 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, 787 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register,
820 intptr_t repetition_register, 788 intptr_t repetition_register,
821 intptr_t repetition_limit, 789 intptr_t repetition_limit,
822 RegExpNode* on_success) { 790 RegExpNode* on_success) {
823 ActionNode* result = 791 ActionNode* result =
824 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); 792 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
825 result->data_.u_empty_match_check.start_register = start_register; 793 result->data_.u_empty_match_check.start_register = start_register;
826 result->data_.u_empty_match_check.repetition_register = repetition_register; 794 result->data_.u_empty_match_check.repetition_register = repetition_register;
827 result->data_.u_empty_match_check.repetition_limit = repetition_limit; 795 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
828 return result; 796 return result;
829 } 797 }
830 798
831
832 #define DEFINE_ACCEPT(Type) \ 799 #define DEFINE_ACCEPT(Type) \
833 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } 800 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
834 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 801 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
835 #undef DEFINE_ACCEPT 802 #undef DEFINE_ACCEPT
836 803
837
838 void LoopChoiceNode::Accept(NodeVisitor* visitor) { 804 void LoopChoiceNode::Accept(NodeVisitor* visitor) {
839 visitor->VisitLoopChoice(this); 805 visitor->VisitLoopChoice(this);
840 } 806 }
841 807
842
843 // ------------------------------------------------------------------- 808 // -------------------------------------------------------------------
844 // Emit code. 809 // Emit code.
845 810
846
847 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, 811 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
848 Guard* guard, 812 Guard* guard,
849 Trace* trace) { 813 Trace* trace) {
850 switch (guard->op()) { 814 switch (guard->op()) {
851 case Guard::LT: 815 case Guard::LT:
852 ASSERT(!trace->mentions_reg(guard->reg())); 816 ASSERT(!trace->mentions_reg(guard->reg()));
853 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), 817 macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
854 trace->backtrack()); 818 trace->backtrack());
855 break; 819 break;
856 case Guard::GEQ: 820 case Guard::GEQ:
857 ASSERT(!trace->mentions_reg(guard->reg())); 821 ASSERT(!trace->mentions_reg(guard->reg()));
858 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), 822 macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
859 trace->backtrack()); 823 trace->backtrack());
860 break; 824 break;
861 } 825 }
862 } 826 }
863 827
864
865 // Returns the number of characters in the equivalence class, omitting those 828 // Returns the number of characters in the equivalence class, omitting those
866 // that cannot occur in the source string because it is ASCII. 829 // that cannot occur in the source string because it is ASCII.
867 static intptr_t GetCaseIndependentLetters(uint16_t character, 830 static intptr_t GetCaseIndependentLetters(uint16_t character,
868 bool one_byte_subject, 831 bool one_byte_subject,
869 int32_t* letters) { 832 int32_t* letters) {
870 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; 833 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
871 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); 834 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters);
872 // Unibrow returns 0 or 1 for characters where case independence is 835 // Unibrow returns 0 or 1 for characters where case independence is
873 // trivial. 836 // trivial.
874 if (length == 0) { 837 if (length == 0) {
875 letters[0] = character; 838 letters[0] = character;
876 length = 1; 839 length = 1;
877 } 840 }
878 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { 841 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) {
879 return length; 842 return length;
880 } 843 }
881 844
882 // The standard requires that non-ASCII characters cannot have ASCII 845 // The standard requires that non-ASCII characters cannot have ASCII
883 // character codes in their equivalence class. 846 // character codes in their equivalence class.
884 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, 847 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
885 // is it? For example, \u00C5 is equivalent to \u212B. 848 // is it? For example, \u00C5 is equivalent to \u212B.
886 return 0; 849 return 0;
887 } 850 }
888 851
889
890 static inline bool EmitSimpleCharacter(Zone* zone, 852 static inline bool EmitSimpleCharacter(Zone* zone,
891 RegExpCompiler* compiler, 853 RegExpCompiler* compiler,
892 uint16_t c, 854 uint16_t c,
893 BlockLabel* on_failure, 855 BlockLabel* on_failure,
894 intptr_t cp_offset, 856 intptr_t cp_offset,
895 bool check, 857 bool check,
896 bool preloaded) { 858 bool preloaded) {
897 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 859 RegExpMacroAssembler* assembler = compiler->macro_assembler();
898 bool bound_checked = false; 860 bool bound_checked = false;
899 if (!preloaded) { 861 if (!preloaded) {
900 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 862 assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
901 bound_checked = true; 863 bound_checked = true;
902 } 864 }
903 assembler->CheckNotCharacter(c, on_failure); 865 assembler->CheckNotCharacter(c, on_failure);
904 return bound_checked; 866 return bound_checked;
905 } 867 }
906 868
907
908 // Only emits non-letters (things that don't have case). Only used for case 869 // Only emits non-letters (things that don't have case). Only used for case
909 // independent matches. 870 // independent matches.
910 static inline bool EmitAtomNonLetter(Zone* zone, 871 static inline bool EmitAtomNonLetter(Zone* zone,
911 RegExpCompiler* compiler, 872 RegExpCompiler* compiler,
912 uint16_t c, 873 uint16_t c,
913 BlockLabel* on_failure, 874 BlockLabel* on_failure,
914 intptr_t cp_offset, 875 intptr_t cp_offset,
915 bool check, 876 bool check,
916 bool preloaded) { 877 bool preloaded) {
917 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 878 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
(...skipping 15 matching lines...) Expand all
933 } 894 }
934 if (!preloaded) { 895 if (!preloaded) {
935 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 896 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
936 checked = check; 897 checked = check;
937 } 898 }
938 macro_assembler->CheckNotCharacter(c, on_failure); 899 macro_assembler->CheckNotCharacter(c, on_failure);
939 } 900 }
940 return checked; 901 return checked;
941 } 902 }
942 903
943
944 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 904 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
945 bool one_byte, 905 bool one_byte,
946 uint16_t c1, 906 uint16_t c1,
947 uint16_t c2, 907 uint16_t c2,
948 BlockLabel* on_failure) { 908 BlockLabel* on_failure) {
949 uint16_t char_mask; 909 uint16_t char_mask;
950 if (one_byte) { 910 if (one_byte) {
951 char_mask = Symbols::kMaxOneCharCodeSymbol; 911 char_mask = Symbols::kMaxOneCharCodeSymbol;
952 } else { 912 } else {
953 char_mask = Utf16::kMaxCodeUnit; 913 char_mask = Utf16::kMaxCodeUnit;
(...skipping 16 matching lines...) Expand all
970 // trick. We avoid the theoretical case where negative numbers are 930 // trick. We avoid the theoretical case where negative numbers are
971 // involved in order to simplify code generation. 931 // involved in order to simplify code generation.
972 uint16_t mask = char_mask ^ diff; 932 uint16_t mask = char_mask ^ diff;
973 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, 933 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
974 on_failure); 934 on_failure);
975 return true; 935 return true;
976 } 936 }
977 return false; 937 return false;
978 } 938 }
979 939
980
981 typedef bool EmitCharacterFunction(Zone* zone, 940 typedef bool EmitCharacterFunction(Zone* zone,
982 RegExpCompiler* compiler, 941 RegExpCompiler* compiler,
983 uint16_t c, 942 uint16_t c,
984 BlockLabel* on_failure, 943 BlockLabel* on_failure,
985 intptr_t cp_offset, 944 intptr_t cp_offset,
986 bool check, 945 bool check,
987 bool preloaded); 946 bool preloaded);
988 947
989 // Only emits letters (things that have case). Only used for case independent 948 // Only emits letters (things that have case). Only used for case independent
990 // matches. 949 // matches.
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1027 macro_assembler->CheckNotCharacter(chars[2], on_failure); 986 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1028 macro_assembler->BindBlock(&ok); 987 macro_assembler->BindBlock(&ok);
1029 break; 988 break;
1030 default: 989 default:
1031 UNREACHABLE(); 990 UNREACHABLE();
1032 break; 991 break;
1033 } 992 }
1034 return true; 993 return true;
1035 } 994 }
1036 995
1037
1038 static void EmitBoundaryTest(RegExpMacroAssembler* masm, 996 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1039 intptr_t border, 997 intptr_t border,
1040 BlockLabel* fall_through, 998 BlockLabel* fall_through,
1041 BlockLabel* above_or_equal, 999 BlockLabel* above_or_equal,
1042 BlockLabel* below) { 1000 BlockLabel* below) {
1043 if (below != fall_through) { 1001 if (below != fall_through) {
1044 masm->CheckCharacterLT(border, below); 1002 masm->CheckCharacterLT(border, below);
1045 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); 1003 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
1046 } else { 1004 } else {
1047 masm->CheckCharacterGT(border - 1, above_or_equal); 1005 masm->CheckCharacterGT(border - 1, above_or_equal);
1048 } 1006 }
1049 } 1007 }
1050 1008
1051
1052 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, 1009 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1053 intptr_t first, 1010 intptr_t first,
1054 intptr_t last, 1011 intptr_t last,
1055 BlockLabel* fall_through, 1012 BlockLabel* fall_through,
1056 BlockLabel* in_range, 1013 BlockLabel* in_range,
1057 BlockLabel* out_of_range) { 1014 BlockLabel* out_of_range) {
1058 if (in_range == fall_through) { 1015 if (in_range == fall_through) {
1059 if (first == last) { 1016 if (first == last) {
1060 masm->CheckNotCharacter(first, out_of_range); 1017 masm->CheckNotCharacter(first, out_of_range);
1061 } else { 1018 } else {
1062 masm->CheckCharacterNotInRange(first, last, out_of_range); 1019 masm->CheckCharacterNotInRange(first, last, out_of_range);
1063 } 1020 }
1064 } else { 1021 } else {
1065 if (first == last) { 1022 if (first == last) {
1066 masm->CheckCharacter(first, in_range); 1023 masm->CheckCharacter(first, in_range);
1067 } else { 1024 } else {
1068 masm->CheckCharacterInRange(first, last, in_range); 1025 masm->CheckCharacterInRange(first, last, in_range);
1069 } 1026 }
1070 if (out_of_range != fall_through) masm->GoTo(out_of_range); 1027 if (out_of_range != fall_through) masm->GoTo(out_of_range);
1071 } 1028 }
1072 } 1029 }
1073 1030
1074
1075 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. 1031 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
1076 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. 1032 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
1077 static void EmitUseLookupTable(RegExpMacroAssembler* masm, 1033 static void EmitUseLookupTable(RegExpMacroAssembler* masm,
1078 ZoneGrowableArray<int>* ranges, 1034 ZoneGrowableArray<int>* ranges,
1079 intptr_t start_index, 1035 intptr_t start_index,
1080 intptr_t end_index, 1036 intptr_t end_index,
1081 intptr_t min_char, 1037 intptr_t min_char,
1082 BlockLabel* fall_through, 1038 BlockLabel* fall_through,
1083 BlockLabel* even_label, 1039 BlockLabel* even_label,
1084 BlockLabel* odd_label) { 1040 BlockLabel* odd_label) {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1124 // TODO(erikcorry): Cache these. 1080 // TODO(erikcorry): Cache these.
1125 const TypedData& ba = TypedData::ZoneHandle( 1081 const TypedData& ba = TypedData::ZoneHandle(
1126 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); 1082 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));
1127 for (intptr_t i = 0; i < kSize; i++) { 1083 for (intptr_t i = 0; i < kSize; i++) {
1128 ba.SetUint8(i, templ[i]); 1084 ba.SetUint8(i, templ[i]);
1129 } 1085 }
1130 masm->CheckBitInTable(ba, on_bit_set); 1086 masm->CheckBitInTable(ba, on_bit_set);
1131 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); 1087 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1132 } 1088 }
1133 1089
1134
1135 static void CutOutRange(RegExpMacroAssembler* masm, 1090 static void CutOutRange(RegExpMacroAssembler* masm,
1136 ZoneGrowableArray<int>* ranges, 1091 ZoneGrowableArray<int>* ranges,
1137 intptr_t start_index, 1092 intptr_t start_index,
1138 intptr_t end_index, 1093 intptr_t end_index,
1139 intptr_t cut_index, 1094 intptr_t cut_index,
1140 BlockLabel* even_label, 1095 BlockLabel* even_label,
1141 BlockLabel* odd_label) { 1096 BlockLabel* odd_label) {
1142 bool odd = (((cut_index - start_index) & 1) == 1); 1097 bool odd = (((cut_index - start_index) & 1) == 1);
1143 BlockLabel* in_range_label = odd ? odd_label : even_label; 1098 BlockLabel* in_range_label = odd ? odd_label : even_label;
1144 BlockLabel dummy; 1099 BlockLabel dummy;
1145 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), 1100 EmitDoubleBoundaryTest(masm, ranges->At(cut_index),
1146 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, 1101 ranges->At(cut_index + 1) - 1, &dummy, in_range_label,
1147 &dummy); 1102 &dummy);
1148 ASSERT(!dummy.IsLinked()); 1103 ASSERT(!dummy.IsLinked());
1149 // Cut out the single range by rewriting the array. This creates a new 1104 // Cut out the single range by rewriting the array. This creates a new
1150 // range that is a merger of the two ranges on either side of the one we 1105 // range that is a merger of the two ranges on either side of the one we
1151 // are cutting out. The oddity of the labels is preserved. 1106 // are cutting out. The oddity of the labels is preserved.
1152 for (intptr_t j = cut_index; j > start_index; j--) { 1107 for (intptr_t j = cut_index; j > start_index; j--) {
1153 (*ranges)[j] = ranges->At(j - 1); 1108 (*ranges)[j] = ranges->At(j - 1);
1154 } 1109 }
1155 for (intptr_t j = cut_index + 1; j < end_index; j++) { 1110 for (intptr_t j = cut_index + 1; j < end_index; j++) {
1156 (*ranges)[j] = ranges->At(j + 1); 1111 (*ranges)[j] = ranges->At(j + 1);
1157 } 1112 }
1158 } 1113 }
1159 1114
1160
1161 // Unicode case. Split the search space into kSize spaces that are handled 1115 // Unicode case. Split the search space into kSize spaces that are handled
1162 // with recursion. 1116 // with recursion.
1163 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, 1117 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges,
1164 intptr_t start_index, 1118 intptr_t start_index,
1165 intptr_t end_index, 1119 intptr_t end_index,
1166 intptr_t* new_start_index, 1120 intptr_t* new_start_index,
1167 intptr_t* new_end_index, 1121 intptr_t* new_end_index,
1168 intptr_t* border) { 1122 intptr_t* border) {
1169 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; 1123 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
1170 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; 1124 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
1216 if (ranges->At(*new_end_index) == *border) { 1170 if (ranges->At(*new_end_index) == *border) {
1217 (*new_end_index)--; 1171 (*new_end_index)--;
1218 } 1172 }
1219 if (*border >= ranges->At(end_index)) { 1173 if (*border >= ranges->At(end_index)) {
1220 *border = ranges->At(end_index); 1174 *border = ranges->At(end_index);
1221 *new_start_index = end_index; // Won't be used. 1175 *new_start_index = end_index; // Won't be used.
1222 *new_end_index = end_index - 1; 1176 *new_end_index = end_index - 1;
1223 } 1177 }
1224 } 1178 }
1225 1179
1226
1227 // Gets a series of segment boundaries representing a character class. If the 1180 // Gets a series of segment boundaries representing a character class. If the
1228 // character is in the range between an even and an odd boundary (counting from 1181 // character is in the range between an even and an odd boundary (counting from
1229 // start_index) then go to even_label, otherwise go to odd_label. We already 1182 // start_index) then go to even_label, otherwise go to odd_label. We already
1230 // know that the character is in the range of min_char to max_char inclusive. 1183 // know that the character is in the range of min_char to max_char inclusive.
1231 // Either label can be NULL indicating backtracking. Either label can also be 1184 // Either label can be NULL indicating backtracking. Either label can also be
1232 // equal to the fall_through label. 1185 // equal to the fall_through label.
1233 static void GenerateBranches(RegExpMacroAssembler* masm, 1186 static void GenerateBranches(RegExpMacroAssembler* masm,
1234 ZoneGrowableArray<int>* ranges, 1187 ZoneGrowableArray<int>* ranges,
1235 intptr_t start_index, 1188 intptr_t start_index,
1236 intptr_t end_index, 1189 intptr_t end_index,
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
1337 1290
1338 if (handle_rest.IsLinked()) { 1291 if (handle_rest.IsLinked()) {
1339 masm->BindBlock(&handle_rest); 1292 masm->BindBlock(&handle_rest);
1340 bool flip = (new_start_index & 1) != (start_index & 1); 1293 bool flip = (new_start_index & 1) != (start_index & 1);
1341 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, 1294 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1342 &dummy, flip ? odd_label : even_label, 1295 &dummy, flip ? odd_label : even_label,
1343 flip ? even_label : odd_label); 1296 flip ? even_label : odd_label);
1344 } 1297 }
1345 } 1298 }
1346 1299
1347
1348 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1300 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1349 RegExpCharacterClass* cc, 1301 RegExpCharacterClass* cc,
1350 bool one_byte, 1302 bool one_byte,
1351 BlockLabel* on_failure, 1303 BlockLabel* on_failure,
1352 intptr_t cp_offset, 1304 intptr_t cp_offset,
1353 bool check_offset, 1305 bool check_offset,
1354 bool preloaded, 1306 bool preloaded,
1355 Zone* zone) { 1307 Zone* zone) {
1356 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); 1308 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
1357 if (!CharacterRange::IsCanonical(ranges)) { 1309 if (!CharacterRange::IsCanonical(ranges)) {
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
1403 if (check_offset) { 1355 if (check_offset) {
1404 macro_assembler->CheckPosition(cp_offset, on_failure); 1356 macro_assembler->CheckPosition(cp_offset, on_failure);
1405 } 1357 }
1406 return; 1358 return;
1407 } 1359 }
1408 1360
1409 if (!preloaded) { 1361 if (!preloaded) {
1410 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 1362 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1411 } 1363 }
1412 1364
1413 if (cc->is_standard() && 1365 if (cc->is_standard() && macro_assembler->CheckSpecialCharacterClass(
1414 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 1366 cc->standard_type(), on_failure)) {
1415 on_failure)) {
1416 return; 1367 return;
1417 } 1368 }
1418 1369
1419
1420 // A new list with ascending entries. Each entry is a code unit 1370 // A new list with ascending entries. Each entry is a code unit
1421 // where there is a boundary between code units that are part of 1371 // where there is a boundary between code units that are part of
1422 // the class and code units that are not. Normally we insert an 1372 // the class and code units that are not. Normally we insert an
1423 // entry at zero which goes to the failure label, but if there 1373 // entry at zero which goes to the failure label, but if there
1424 // was already one there we fall through for success on that entry. 1374 // was already one there we fall through for success on that entry.
1425 // Subsequent entries have alternating meaning (success/failure). 1375 // Subsequent entries have alternating meaning (success/failure).
1426 ZoneGrowableArray<int>* range_boundaries = 1376 ZoneGrowableArray<int>* range_boundaries =
1427 new (zone) ZoneGrowableArray<int>(last_valid_range); 1377 new (zone) ZoneGrowableArray<int>(last_valid_range);
1428 1378
1429 bool zeroth_entry_is_failure = !cc->is_negated(); 1379 bool zeroth_entry_is_failure = !cc->is_negated();
(...skipping 17 matching lines...) Expand all
1447 GenerateBranches(macro_assembler, range_boundaries, 1397 GenerateBranches(macro_assembler, range_boundaries,
1448 0, // start_index. 1398 0, // start_index.
1449 end_index, 1399 end_index,
1450 0, // min_char. 1400 0, // min_char.
1451 max_char, &fall_through, 1401 max_char, &fall_through,
1452 zeroth_entry_is_failure ? &fall_through : on_failure, 1402 zeroth_entry_is_failure ? &fall_through : on_failure,
1453 zeroth_entry_is_failure ? on_failure : &fall_through); 1403 zeroth_entry_is_failure ? on_failure : &fall_through);
1454 macro_assembler->BindBlock(&fall_through); 1404 macro_assembler->BindBlock(&fall_through);
1455 } 1405 }
1456 1406
1457
1458 RegExpNode::~RegExpNode() {} 1407 RegExpNode::~RegExpNode() {}
1459 1408
1460
1461 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 1409 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1462 Trace* trace) { 1410 Trace* trace) {
1463 // If we are generating a greedy loop then don't stop and don't reuse code. 1411 // If we are generating a greedy loop then don't stop and don't reuse code.
1464 if (trace->stop_node() != NULL) { 1412 if (trace->stop_node() != NULL) {
1465 return CONTINUE; 1413 return CONTINUE;
1466 } 1414 }
1467 1415
1468 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1416 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1469 if (trace->is_trivial()) { 1417 if (trace->is_trivial()) {
1470 if (label_.IsBound()) { 1418 if (label_.IsBound()) {
(...skipping 22 matching lines...) Expand all
1493 return CONTINUE; 1441 return CONTINUE;
1494 } 1442 }
1495 1443
1496 // If we get here code has been generated for this node too many times or 1444 // If we get here code has been generated for this node too many times or
1497 // recursion is too deep. Time to switch to a generic version. The code for 1445 // recursion is too deep. Time to switch to a generic version. The code for
1498 // generic versions above can handle deep recursion properly. 1446 // generic versions above can handle deep recursion properly.
1499 trace->Flush(compiler, this); 1447 trace->Flush(compiler, this);
1500 return DONE; 1448 return DONE;
1501 } 1449 }
1502 1450
1503
1504 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, 1451 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find,
1505 intptr_t budget, 1452 intptr_t budget,
1506 bool not_at_start) { 1453 bool not_at_start) {
1507 if (budget <= 0) return 0; 1454 if (budget <= 0) return 0;
1508 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 1455 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
1509 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); 1456 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1510 } 1457 }
1511 1458
1512
1513 void ActionNode::FillInBMInfo(intptr_t offset, 1459 void ActionNode::FillInBMInfo(intptr_t offset,
1514 intptr_t budget, 1460 intptr_t budget,
1515 BoyerMooreLookahead* bm, 1461 BoyerMooreLookahead* bm,
1516 bool not_at_start) { 1462 bool not_at_start) {
1517 if (action_type_ == BEGIN_SUBMATCH) { 1463 if (action_type_ == BEGIN_SUBMATCH) {
1518 bm->SetRest(offset); 1464 bm->SetRest(offset);
1519 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { 1465 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
1520 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); 1466 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1521 } 1467 }
1522 SaveBMInfo(bm, not_at_start, offset); 1468 SaveBMInfo(bm, not_at_start, offset);
1523 } 1469 }
1524 1470
1525
1526 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, 1471 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find,
1527 intptr_t budget, 1472 intptr_t budget,
1528 bool not_at_start) { 1473 bool not_at_start) {
1529 if (budget <= 0) return 0; 1474 if (budget <= 0) return 0;
1530 // If we know we are not at the start and we are asked "how many characters 1475 // If we know we are not at the start and we are asked "how many characters
1531 // will you match if you succeed?" then we can answer anything since false 1476 // will you match if you succeed?" then we can answer anything since false
1532 // implies false. So lets just return the max answer (still_to_find) since 1477 // implies false. So lets just return the max answer (still_to_find) since
1533 // that won't prevent us from preloading a lot of characters for the other 1478 // that won't prevent us from preloading a lot of characters for the other
1534 // branches in the node graph. 1479 // branches in the node graph.
1535 if (assertion_type() == AT_START && not_at_start) return still_to_find; 1480 if (assertion_type() == AT_START && not_at_start) return still_to_find;
1536 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); 1481 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1537 } 1482 }
1538 1483
1539
1540 void AssertionNode::FillInBMInfo(intptr_t offset, 1484 void AssertionNode::FillInBMInfo(intptr_t offset,
1541 intptr_t budget, 1485 intptr_t budget,
1542 BoyerMooreLookahead* bm, 1486 BoyerMooreLookahead* bm,
1543 bool not_at_start) { 1487 bool not_at_start) {
1544 // Match the behaviour of EatsAtLeast on this node. 1488 // Match the behaviour of EatsAtLeast on this node.
1545 if (assertion_type() == AT_START && not_at_start) return; 1489 if (assertion_type() == AT_START && not_at_start) return;
1546 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); 1490 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1547 SaveBMInfo(bm, not_at_start, offset); 1491 SaveBMInfo(bm, not_at_start, offset);
1548 } 1492 }
1549 1493
1550
1551 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, 1494 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find,
1552 intptr_t budget, 1495 intptr_t budget,
1553 bool not_at_start) { 1496 bool not_at_start) {
1554 if (budget <= 0) return 0; 1497 if (budget <= 0) return 0;
1555 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); 1498 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1556 } 1499 }
1557 1500
1558
1559 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, 1501 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,
1560 intptr_t budget, 1502 intptr_t budget,
1561 bool not_at_start) { 1503 bool not_at_start) {
1562 intptr_t answer = Length(); 1504 intptr_t answer = Length();
1563 if (answer >= still_to_find) return answer; 1505 if (answer >= still_to_find) return answer;
1564 if (budget <= 0) return answer; 1506 if (budget <= 0) return answer;
1565 // We are not at start after this node so we set the last argument to 'true'. 1507 // We are not at start after this node so we set the last argument to 'true'.
1566 return answer + 1508 return answer +
1567 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); 1509 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true);
1568 } 1510 }
1569 1511
1570
1571 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, 1512 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find,
1572 intptr_t budget, 1513 intptr_t budget,
1573 bool not_at_start) { 1514 bool not_at_start) {
1574 if (budget <= 0) return 0; 1515 if (budget <= 0) return 0;
1575 // Alternative 0 is the negative lookahead, alternative 1 is what comes 1516 // Alternative 0 is the negative lookahead, alternative 1 is what comes
1576 // afterwards. 1517 // afterwards.
1577 RegExpNode* node = (*alternatives_)[1].node(); 1518 RegExpNode* node = (*alternatives_)[1].node();
1578 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); 1519 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1579 } 1520 }
1580 1521
1581
1582 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( 1522 void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
1583 QuickCheckDetails* details, 1523 QuickCheckDetails* details,
1584 RegExpCompiler* compiler, 1524 RegExpCompiler* compiler,
1585 intptr_t filled_in, 1525 intptr_t filled_in,
1586 bool not_at_start) { 1526 bool not_at_start) {
1587 // Alternative 0 is the negative lookahead, alternative 1 is what comes 1527 // Alternative 0 is the negative lookahead, alternative 1 is what comes
1588 // afterwards. 1528 // afterwards.
1589 RegExpNode* node = (*alternatives_)[1].node(); 1529 RegExpNode* node = (*alternatives_)[1].node();
1590 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); 1530 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
1591 } 1531 }
1592 1532
1593
1594 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, 1533 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find,
1595 intptr_t budget, 1534 intptr_t budget,
1596 RegExpNode* ignore_this_node, 1535 RegExpNode* ignore_this_node,
1597 bool not_at_start) { 1536 bool not_at_start) {
1598 if (budget <= 0) return 0; 1537 if (budget <= 0) return 0;
1599 intptr_t min = 100; 1538 intptr_t min = 100;
1600 intptr_t choice_count = alternatives_->length(); 1539 intptr_t choice_count = alternatives_->length();
1601 budget = (budget - 1) / choice_count; 1540 budget = (budget - 1) / choice_count;
1602 for (intptr_t i = 0; i < choice_count; i++) { 1541 for (intptr_t i = 0; i < choice_count; i++) {
1603 RegExpNode* node = (*alternatives_)[i].node(); 1542 RegExpNode* node = (*alternatives_)[i].node();
1604 if (node == ignore_this_node) continue; 1543 if (node == ignore_this_node) continue;
1605 intptr_t node_eats_at_least = 1544 intptr_t node_eats_at_least =
1606 node->EatsAtLeast(still_to_find, budget, not_at_start); 1545 node->EatsAtLeast(still_to_find, budget, not_at_start);
1607 if (node_eats_at_least < min) min = node_eats_at_least; 1546 if (node_eats_at_least < min) min = node_eats_at_least;
1608 if (min == 0) return 0; 1547 if (min == 0) return 0;
1609 } 1548 }
1610 return min; 1549 return min;
1611 } 1550 }
1612 1551
1613
1614 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, 1552 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,
1615 intptr_t budget, 1553 intptr_t budget,
1616 bool not_at_start) { 1554 bool not_at_start) {
1617 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); 1555 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start);
1618 } 1556 }
1619 1557
1620
1621 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, 1558 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find,
1622 intptr_t budget, 1559 intptr_t budget,
1623 bool not_at_start) { 1560 bool not_at_start) {
1624 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); 1561 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start);
1625 } 1562 }
1626 1563
1627
1628 // Takes the left-most 1-bit and smears it out, setting all bits to its right. 1564 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
1629 static inline uint32_t SmearBitsRight(uint32_t v) { 1565 static inline uint32_t SmearBitsRight(uint32_t v) {
1630 v |= v >> 1; 1566 v |= v >> 1;
1631 v |= v >> 2; 1567 v |= v >> 2;
1632 v |= v >> 4; 1568 v |= v >> 4;
1633 v |= v >> 8; 1569 v |= v >> 8;
1634 v |= v >> 16; 1570 v |= v >> 16;
1635 return v; 1571 return v;
1636 } 1572 }
1637 1573
1638
1639 bool QuickCheckDetails::Rationalize(bool asc) { 1574 bool QuickCheckDetails::Rationalize(bool asc) {
1640 bool found_useful_op = false; 1575 bool found_useful_op = false;
1641 uint32_t char_mask; 1576 uint32_t char_mask;
1642 if (asc) { 1577 if (asc) {
1643 char_mask = Symbols::kMaxOneCharCodeSymbol; 1578 char_mask = Symbols::kMaxOneCharCodeSymbol;
1644 } else { 1579 } else {
1645 char_mask = Utf16::kMaxCodeUnit; 1580 char_mask = Utf16::kMaxCodeUnit;
1646 } 1581 }
1647 mask_ = 0; 1582 mask_ = 0;
1648 value_ = 0; 1583 value_ = 0;
1649 intptr_t char_shift = 0; 1584 intptr_t char_shift = 0;
1650 for (intptr_t i = 0; i < characters_; i++) { 1585 for (intptr_t i = 0; i < characters_; i++) {
1651 Position* pos = &positions_[i]; 1586 Position* pos = &positions_[i];
1652 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { 1587 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) {
1653 found_useful_op = true; 1588 found_useful_op = true;
1654 } 1589 }
1655 mask_ |= (pos->mask & char_mask) << char_shift; 1590 mask_ |= (pos->mask & char_mask) << char_shift;
1656 value_ |= (pos->value & char_mask) << char_shift; 1591 value_ |= (pos->value & char_mask) << char_shift;
1657 char_shift += asc ? 8 : 16; 1592 char_shift += asc ? 8 : 16;
1658 } 1593 }
1659 return found_useful_op; 1594 return found_useful_op;
1660 } 1595 }
1661 1596
1662
1663 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, 1597 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1664 Trace* bounds_check_trace, 1598 Trace* bounds_check_trace,
1665 Trace* trace, 1599 Trace* trace,
1666 bool preload_has_checked_bounds, 1600 bool preload_has_checked_bounds,
1667 BlockLabel* on_possible_success, 1601 BlockLabel* on_possible_success,
1668 QuickCheckDetails* details, 1602 QuickCheckDetails* details,
1669 bool fall_through_on_failure) { 1603 bool fall_through_on_failure) {
1670 if (details->characters() == 0) return false; 1604 if (details->characters() == 0) return false;
1671 GetQuickCheckDetails(details, compiler, 0, 1605 GetQuickCheckDetails(details, compiler, 0,
1672 trace->at_start() == Trace::FALSE_VALUE); 1606 trace->at_start() == Trace::FALSE_VALUE);
(...skipping 10 matching lines...) Expand all
1683 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); 1617 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset());
1684 // We are attempting to preload the minimum number of characters 1618 // We are attempting to preload the minimum number of characters
1685 // any choice would eat, so if the bounds check fails, then none of the 1619 // any choice would eat, so if the bounds check fails, then none of the
1686 // choices can succeed, so we can just immediately backtrack, rather 1620 // choices can succeed, so we can just immediately backtrack, rather
1687 // than go to the next choice. 1621 // than go to the next choice.
1688 assembler->LoadCurrentCharacter( 1622 assembler->LoadCurrentCharacter(
1689 trace->cp_offset(), bounds_check_trace->backtrack(), 1623 trace->cp_offset(), bounds_check_trace->backtrack(),
1690 !preload_has_checked_bounds, details->characters()); 1624 !preload_has_checked_bounds, details->characters());
1691 } 1625 }
1692 1626
1693
1694 bool need_mask = true; 1627 bool need_mask = true;
1695 1628
1696 if (details->characters() == 1) { 1629 if (details->characters() == 1) {
1697 // If number of characters preloaded is 1 then we used a byte or 16 bit 1630 // If number of characters preloaded is 1 then we used a byte or 16 bit
1698 // load so the value is already masked down. 1631 // load so the value is already masked down.
1699 uint32_t char_mask; 1632 uint32_t char_mask;
1700 if (compiler->one_byte()) { 1633 if (compiler->one_byte()) {
1701 char_mask = Symbols::kMaxOneCharCodeSymbol; 1634 char_mask = Symbols::kMaxOneCharCodeSymbol;
1702 } else { 1635 } else {
1703 char_mask = Utf16::kMaxCodeUnit; 1636 char_mask = Utf16::kMaxCodeUnit;
(...skipping 21 matching lines...) Expand all
1725 } else { 1658 } else {
1726 if (need_mask) { 1659 if (need_mask) {
1727 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); 1660 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
1728 } else { 1661 } else {
1729 assembler->CheckNotCharacter(value, trace->backtrack()); 1662 assembler->CheckNotCharacter(value, trace->backtrack());
1730 } 1663 }
1731 } 1664 }
1732 return true; 1665 return true;
1733 } 1666 }
1734 1667
1735
1736 // Here is the meat of GetQuickCheckDetails (see also the comment on the 1668 // Here is the meat of GetQuickCheckDetails (see also the comment on the
1737 // super-class in the .h file). 1669 // super-class in the .h file).
1738 // 1670 //
1739 // We iterate along the text object, building up for each character a 1671 // We iterate along the text object, building up for each character a
1740 // mask and value that can be used to test for a quick failure to match. 1672 // mask and value that can be used to test for a quick failure to match.
1741 // The masks and values for the positions will be combined into a single 1673 // The masks and values for the positions will be combined into a single
1742 // machine word for the current character width in order to be used in 1674 // machine word for the current character width in order to be used in
1743 // generating a quick check. 1675 // generating a quick check.
1744 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 1676 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
1745 RegExpCompiler* compiler, 1677 RegExpCompiler* compiler,
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
1889 } 1821 }
1890 } 1822 }
1891 } 1823 }
1892 ASSERT(characters_filled_in != details->characters()); 1824 ASSERT(characters_filled_in != details->characters());
1893 if (!details->cannot_match()) { 1825 if (!details->cannot_match()) {
1894 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, 1826 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1895 true); 1827 true);
1896 } 1828 }
1897 } 1829 }
1898 1830
1899
1900 void QuickCheckDetails::Clear() { 1831 void QuickCheckDetails::Clear() {
1901 for (int i = 0; i < characters_; i++) { 1832 for (int i = 0; i < characters_; i++) {
1902 positions_[i].mask = 0; 1833 positions_[i].mask = 0;
1903 positions_[i].value = 0; 1834 positions_[i].value = 0;
1904 positions_[i].determines_perfectly = false; 1835 positions_[i].determines_perfectly = false;
1905 } 1836 }
1906 characters_ = 0; 1837 characters_ = 0;
1907 } 1838 }
1908 1839
1909
1910 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { 1840 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) {
1911 ASSERT(by >= 0); 1841 ASSERT(by >= 0);
1912 if (by >= characters_) { 1842 if (by >= characters_) {
1913 Clear(); 1843 Clear();
1914 return; 1844 return;
1915 } 1845 }
1916 for (intptr_t i = 0; i < characters_ - by; i++) { 1846 for (intptr_t i = 0; i < characters_ - by; i++) {
1917 positions_[i] = positions_[by + i]; 1847 positions_[i] = positions_[by + i];
1918 } 1848 }
1919 for (intptr_t i = characters_ - by; i < characters_; i++) { 1849 for (intptr_t i = characters_ - by; i < characters_; i++) {
1920 positions_[i].mask = 0; 1850 positions_[i].mask = 0;
1921 positions_[i].value = 0; 1851 positions_[i].value = 0;
1922 positions_[i].determines_perfectly = false; 1852 positions_[i].determines_perfectly = false;
1923 } 1853 }
1924 characters_ -= by; 1854 characters_ -= by;
1925 // We could change mask_ and value_ here but we would never advance unless 1855 // We could change mask_ and value_ here but we would never advance unless
1926 // they had already been used in a check and they won't be used again because 1856 // they had already been used in a check and they won't be used again because
1927 // it would gain us nothing. So there's no point. 1857 // it would gain us nothing. So there's no point.
1928 } 1858 }
1929 1859
1930
1931 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { 1860 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) {
1932 ASSERT(characters_ == other->characters_); 1861 ASSERT(characters_ == other->characters_);
1933 if (other->cannot_match_) { 1862 if (other->cannot_match_) {
1934 return; 1863 return;
1935 } 1864 }
1936 if (cannot_match_) { 1865 if (cannot_match_) {
1937 *this = *other; 1866 *this = *other;
1938 return; 1867 return;
1939 } 1868 }
1940 for (intptr_t i = from_index; i < characters_; i++) { 1869 for (intptr_t i = from_index; i < characters_; i++) {
1941 QuickCheckDetails::Position* pos = positions(i); 1870 QuickCheckDetails::Position* pos = positions(i);
1942 QuickCheckDetails::Position* other_pos = other->positions(i); 1871 QuickCheckDetails::Position* other_pos = other->positions(i);
1943 if (pos->mask != other_pos->mask || pos->value != other_pos->value || 1872 if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
1944 !other_pos->determines_perfectly) { 1873 !other_pos->determines_perfectly) {
1945 // Our mask-compare operation will be approximate unless we have the 1874 // Our mask-compare operation will be approximate unless we have the
1946 // exact same operation on both sides of the alternation. 1875 // exact same operation on both sides of the alternation.
1947 pos->determines_perfectly = false; 1876 pos->determines_perfectly = false;
1948 } 1877 }
1949 pos->mask &= other_pos->mask; 1878 pos->mask &= other_pos->mask;
1950 pos->value &= pos->mask; 1879 pos->value &= pos->mask;
1951 other_pos->value &= pos->mask; 1880 other_pos->value &= pos->mask;
1952 uint16_t differing_bits = (pos->value ^ other_pos->value); 1881 uint16_t differing_bits = (pos->value ^ other_pos->value);
1953 pos->mask &= ~differing_bits; 1882 pos->mask &= ~differing_bits;
1954 pos->value &= pos->mask; 1883 pos->value &= pos->mask;
1955 } 1884 }
1956 } 1885 }
1957 1886
1958
1959 class VisitMarker : public ValueObject { 1887 class VisitMarker : public ValueObject {
1960 public: 1888 public:
1961 explicit VisitMarker(NodeInfo* info) : info_(info) { 1889 explicit VisitMarker(NodeInfo* info) : info_(info) {
1962 ASSERT(!info->visited); 1890 ASSERT(!info->visited);
1963 info->visited = true; 1891 info->visited = true;
1964 } 1892 }
1965 ~VisitMarker() { info_->visited = false; } 1893 ~VisitMarker() { info_->visited = false; }
1966 1894
1967 private: 1895 private:
1968 NodeInfo* info_; 1896 NodeInfo* info_;
1969 }; 1897 };
1970 1898
1971
1972 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { 1899 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) {
1973 if (info()->replacement_calculated) return replacement(); 1900 if (info()->replacement_calculated) return replacement();
1974 if (depth < 0) return this; 1901 if (depth < 0) return this;
1975 ASSERT(!info()->visited); 1902 ASSERT(!info()->visited);
1976 VisitMarker marker(info()); 1903 VisitMarker marker(info());
1977 return FilterSuccessor(depth - 1, ignore_case); 1904 return FilterSuccessor(depth - 1, ignore_case);
1978 } 1905 }
1979 1906
1980
1981 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { 1907 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) {
1982 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); 1908 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
1983 if (next == NULL) return set_replacement(NULL); 1909 if (next == NULL) return set_replacement(NULL);
1984 on_success_ = next; 1910 on_success_ = next;
1985 return set_replacement(this); 1911 return set_replacement(this);
1986 } 1912 }
1987 1913
1988
1989 // We need to check for the following characters: 0x39c 0x3bc 0x178. 1914 // We need to check for the following characters: 0x39c 0x3bc 0x178.
1990 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { 1915 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
1991 // TODO(dcarney): this could be a lot more efficient. 1916 // TODO(dcarney): this could be a lot more efficient.
1992 return range.Contains(0x39c) || range.Contains(0x3bc) || 1917 return range.Contains(0x39c) || range.Contains(0x3bc) ||
1993 range.Contains(0x178); 1918 range.Contains(0x178);
1994 } 1919 }
1995 1920
1996
1997 static bool RangesContainLatin1Equivalents( 1921 static bool RangesContainLatin1Equivalents(
1998 ZoneGrowableArray<CharacterRange>* ranges) { 1922 ZoneGrowableArray<CharacterRange>* ranges) {
1999 for (intptr_t i = 0; i < ranges->length(); i++) { 1923 for (intptr_t i = 0; i < ranges->length(); i++) {
2000 // TODO(dcarney): this could be a lot more efficient. 1924 // TODO(dcarney): this could be a lot more efficient.
2001 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; 1925 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true;
2002 } 1926 }
2003 return false; 1927 return false;
2004 } 1928 }
2005 1929
2006
2007 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { 1930 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) {
2008 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); 1931 ASSERT(c > Symbols::kMaxOneCharCodeSymbol);
2009 switch (c) { 1932 switch (c) {
2010 // This are equivalent characters in unicode. 1933 // This are equivalent characters in unicode.
2011 case 0x39c: 1934 case 0x39c:
2012 case 0x3bc: 1935 case 0x3bc:
2013 return 0xb5; 1936 return 0xb5;
2014 // This is an uppercase of a Latin-1 character 1937 // This is an uppercase of a Latin-1 character
2015 // outside of Latin-1. 1938 // outside of Latin-1.
2016 case 0x178: 1939 case 0x178:
2017 return 0xff; 1940 return 0xff;
2018 } 1941 }
2019 return 0; 1942 return 0;
2020 } 1943 }
2021 1944
2022
2023 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { 1945 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) {
2024 if (info()->replacement_calculated) return replacement(); 1946 if (info()->replacement_calculated) return replacement();
2025 if (depth < 0) return this; 1947 if (depth < 0) return this;
2026 ASSERT(!info()->visited); 1948 ASSERT(!info()->visited);
2027 VisitMarker marker(info()); 1949 VisitMarker marker(info());
2028 intptr_t element_count = elms_->length(); 1950 intptr_t element_count = elms_->length();
2029 for (intptr_t i = 0; i < element_count; i++) { 1951 for (intptr_t i = 0; i < element_count; i++) {
2030 TextElement elm = elms_->At(i); 1952 TextElement elm = elms_->At(i);
2031 if (elm.text_type() == TextElement::ATOM) { 1953 if (elm.text_type() == TextElement::ATOM) {
2032 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); 1954 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data();
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
2064 // This will be handled in a later filter. 1986 // This will be handled in a later filter.
2065 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 1987 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2066 return set_replacement(NULL); 1988 return set_replacement(NULL);
2067 } 1989 }
2068 } 1990 }
2069 } 1991 }
2070 } 1992 }
2071 return FilterSuccessor(depth - 1, ignore_case); 1993 return FilterSuccessor(depth - 1, ignore_case);
2072 } 1994 }
2073 1995
2074
2075 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { 1996 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) {
2076 if (info()->replacement_calculated) return replacement(); 1997 if (info()->replacement_calculated) return replacement();
2077 if (depth < 0) return this; 1998 if (depth < 0) return this;
2078 if (info()->visited) return this; 1999 if (info()->visited) return this;
2079 { 2000 {
2080 VisitMarker marker(info()); 2001 VisitMarker marker(info());
2081 2002
2082 RegExpNode* continue_replacement = 2003 RegExpNode* continue_replacement =
2083 continue_node_->FilterOneByte(depth - 1, ignore_case); 2004 continue_node_->FilterOneByte(depth - 1, ignore_case);
2084 // If we can't continue after the loop then there is no sense in doing the 2005 // If we can't continue after the loop then there is no sense in doing the
2085 // loop. 2006 // loop.
2086 if (continue_replacement == NULL) return set_replacement(NULL); 2007 if (continue_replacement == NULL) return set_replacement(NULL);
2087 } 2008 }
2088 2009
2089 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); 2010 return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
2090 } 2011 }
2091 2012
2092
2093 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { 2013 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) {
2094 if (info()->replacement_calculated) return replacement(); 2014 if (info()->replacement_calculated) return replacement();
2095 if (depth < 0) return this; 2015 if (depth < 0) return this;
2096 if (info()->visited) return this; 2016 if (info()->visited) return this;
2097 VisitMarker marker(info()); 2017 VisitMarker marker(info());
2098 intptr_t choice_count = alternatives_->length(); 2018 intptr_t choice_count = alternatives_->length();
2099 2019
2100 for (intptr_t i = 0; i < choice_count; i++) { 2020 for (intptr_t i = 0; i < choice_count; i++) {
2101 GuardedAlternative alternative = alternatives_->At(i); 2021 GuardedAlternative alternative = alternatives_->At(i);
2102 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { 2022 if (alternative.guards() != NULL && alternative.guards()->length() != 0) {
(...skipping 30 matching lines...) Expand all
2133 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); 2053 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case);
2134 if (replacement != NULL) { 2054 if (replacement != NULL) {
2135 (*alternatives_)[i].set_node(replacement); 2055 (*alternatives_)[i].set_node(replacement);
2136 new_alternatives->Add((*alternatives_)[i]); 2056 new_alternatives->Add((*alternatives_)[i]);
2137 } 2057 }
2138 } 2058 }
2139 alternatives_ = new_alternatives; 2059 alternatives_ = new_alternatives;
2140 return this; 2060 return this;
2141 } 2061 }
2142 2062
2143
2144 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, 2063 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth,
2145 bool ignore_case) { 2064 bool ignore_case) {
2146 if (info()->replacement_calculated) return replacement(); 2065 if (info()->replacement_calculated) return replacement();
2147 if (depth < 0) return this; 2066 if (depth < 0) return this;
2148 if (info()->visited) return this; 2067 if (info()->visited) return this;
2149 VisitMarker marker(info()); 2068 VisitMarker marker(info());
2150 // Alternative 0 is the negative lookahead, alternative 1 is what comes 2069 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2151 // afterwards. 2070 // afterwards.
2152 RegExpNode* node = (*alternatives_)[1].node(); 2071 RegExpNode* node = (*alternatives_)[1].node();
2153 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); 2072 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
2154 if (replacement == NULL) return set_replacement(NULL); 2073 if (replacement == NULL) return set_replacement(NULL);
2155 (*alternatives_)[1].set_node(replacement); 2074 (*alternatives_)[1].set_node(replacement);
2156 2075
2157 RegExpNode* neg_node = (*alternatives_)[0].node(); 2076 RegExpNode* neg_node = (*alternatives_)[0].node();
2158 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); 2077 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
2159 // If the negative lookahead is always going to fail then 2078 // If the negative lookahead is always going to fail then
2160 // we don't need to check it. 2079 // we don't need to check it.
2161 if (neg_replacement == NULL) return set_replacement(replacement); 2080 if (neg_replacement == NULL) return set_replacement(replacement);
2162 (*alternatives_)[0].set_node(neg_replacement); 2081 (*alternatives_)[0].set_node(neg_replacement);
2163 return set_replacement(this); 2082 return set_replacement(this);
2164 } 2083 }
2165 2084
2166
2167 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2085 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2168 RegExpCompiler* compiler, 2086 RegExpCompiler* compiler,
2169 intptr_t characters_filled_in, 2087 intptr_t characters_filled_in,
2170 bool not_at_start) { 2088 bool not_at_start) {
2171 if (body_can_be_zero_length_ || info()->visited) return; 2089 if (body_can_be_zero_length_ || info()->visited) return;
2172 VisitMarker marker(info()); 2090 VisitMarker marker(info());
2173 return ChoiceNode::GetQuickCheckDetails(details, compiler, 2091 return ChoiceNode::GetQuickCheckDetails(details, compiler,
2174 characters_filled_in, not_at_start); 2092 characters_filled_in, not_at_start);
2175 } 2093 }
2176 2094
2177
2178 void LoopChoiceNode::FillInBMInfo(intptr_t offset, 2095 void LoopChoiceNode::FillInBMInfo(intptr_t offset,
2179 intptr_t budget, 2096 intptr_t budget,
2180 BoyerMooreLookahead* bm, 2097 BoyerMooreLookahead* bm,
2181 bool not_at_start) { 2098 bool not_at_start) {
2182 if (body_can_be_zero_length_ || budget <= 0) { 2099 if (body_can_be_zero_length_ || budget <= 0) {
2183 bm->SetRest(offset); 2100 bm->SetRest(offset);
2184 SaveBMInfo(bm, not_at_start, offset); 2101 SaveBMInfo(bm, not_at_start, offset);
2185 return; 2102 return;
2186 } 2103 }
2187 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); 2104 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
2188 SaveBMInfo(bm, not_at_start, offset); 2105 SaveBMInfo(bm, not_at_start, offset);
2189 } 2106 }
2190 2107
2191
2192 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2108 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2193 RegExpCompiler* compiler, 2109 RegExpCompiler* compiler,
2194 intptr_t characters_filled_in, 2110 intptr_t characters_filled_in,
2195 bool not_at_start) { 2111 bool not_at_start) {
2196 not_at_start = (not_at_start || not_at_start_); 2112 not_at_start = (not_at_start || not_at_start_);
2197 intptr_t choice_count = alternatives_->length(); 2113 intptr_t choice_count = alternatives_->length();
2198 ASSERT(choice_count > 0); 2114 ASSERT(choice_count > 0);
2199 (*alternatives_)[0].node()->GetQuickCheckDetails( 2115 (*alternatives_)[0].node()->GetQuickCheckDetails(
2200 details, compiler, characters_filled_in, not_at_start); 2116 details, compiler, characters_filled_in, not_at_start);
2201 for (intptr_t i = 1; i < choice_count; i++) { 2117 for (intptr_t i = 1; i < choice_count; i++) {
2202 QuickCheckDetails new_details(details->characters()); 2118 QuickCheckDetails new_details(details->characters());
2203 RegExpNode* node = (*alternatives_)[i].node(); 2119 RegExpNode* node = (*alternatives_)[i].node();
2204 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, 2120 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
2205 not_at_start); 2121 not_at_start);
2206 // Here we merge the quick match details of the two branches. 2122 // Here we merge the quick match details of the two branches.
2207 details->Merge(&new_details, characters_filled_in); 2123 details->Merge(&new_details, characters_filled_in);
2208 } 2124 }
2209 } 2125 }
2210 2126
2211
2212 // Check for [0-9A-Z_a-z]. 2127 // Check for [0-9A-Z_a-z].
2213 static void EmitWordCheck(RegExpMacroAssembler* assembler, 2128 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2214 BlockLabel* word, 2129 BlockLabel* word,
2215 BlockLabel* non_word, 2130 BlockLabel* non_word,
2216 bool fall_through_on_word) { 2131 bool fall_through_on_word) {
2217 if (assembler->CheckSpecialCharacterClass( 2132 if (assembler->CheckSpecialCharacterClass(
2218 fall_through_on_word ? 'w' : 'W', 2133 fall_through_on_word ? 'w' : 'W',
2219 fall_through_on_word ? non_word : word)) { 2134 fall_through_on_word ? non_word : word)) {
2220 // Optimized implementation available. 2135 // Optimized implementation available.
2221 return; 2136 return;
2222 } 2137 }
2223 assembler->CheckCharacterGT('z', non_word); 2138 assembler->CheckCharacterGT('z', non_word);
2224 assembler->CheckCharacterLT('0', non_word); 2139 assembler->CheckCharacterLT('0', non_word);
2225 assembler->CheckCharacterGT('a' - 1, word); 2140 assembler->CheckCharacterGT('a' - 1, word);
2226 assembler->CheckCharacterLT('9' + 1, word); 2141 assembler->CheckCharacterLT('9' + 1, word);
2227 assembler->CheckCharacterLT('A', non_word); 2142 assembler->CheckCharacterLT('A', non_word);
2228 assembler->CheckCharacterLT('Z' + 1, word); 2143 assembler->CheckCharacterLT('Z' + 1, word);
2229 if (fall_through_on_word) { 2144 if (fall_through_on_word) {
2230 assembler->CheckNotCharacter('_', non_word); 2145 assembler->CheckNotCharacter('_', non_word);
2231 } else { 2146 } else {
2232 assembler->CheckCharacter('_', word); 2147 assembler->CheckCharacter('_', word);
2233 } 2148 }
2234 } 2149 }
2235 2150
2236
2237 // Emit the code to check for a ^ in multiline mode (1-character lookbehind 2151 // Emit the code to check for a ^ in multiline mode (1-character lookbehind
2238 // that matches newline or the start of input). 2152 // that matches newline or the start of input).
2239 static void EmitHat(RegExpCompiler* compiler, 2153 static void EmitHat(RegExpCompiler* compiler,
2240 RegExpNode* on_success, 2154 RegExpNode* on_success,
2241 Trace* trace) { 2155 Trace* trace) {
2242 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2156 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2243 // We will be loading the previous character into the current character 2157 // We will be loading the previous character into the current character
2244 // register. 2158 // register.
2245 Trace new_trace(*trace); 2159 Trace new_trace(*trace);
2246 new_trace.InvalidateCurrentCharacter(); 2160 new_trace.InvalidateCurrentCharacter();
(...skipping 13 matching lines...) Expand all
2260 if (!compiler->one_byte()) { 2174 if (!compiler->one_byte()) {
2261 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); 2175 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
2262 } 2176 }
2263 assembler->CheckCharacter('\n', &ok); 2177 assembler->CheckCharacter('\n', &ok);
2264 assembler->CheckNotCharacter('\r', new_trace.backtrack()); 2178 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2265 } 2179 }
2266 assembler->BindBlock(&ok); 2180 assembler->BindBlock(&ok);
2267 on_success->Emit(compiler, &new_trace); 2181 on_success->Emit(compiler, &new_trace);
2268 } 2182 }
2269 2183
2270
2271 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). 2184 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2272 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { 2185 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2273 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2186 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2274 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2187 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2275 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); 2188 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2276 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2189 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2277 if (lookahead == NULL) { 2190 if (lookahead == NULL) {
2278 intptr_t eats_at_least = 2191 intptr_t eats_at_least =
2279 Utils::Minimum(kMaxLookaheadForBoyerMoore, 2192 Utils::Minimum(kMaxLookaheadForBoyerMoore,
2280 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, 2193 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget,
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
2315 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 2228 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2316 assembler->BindBlock(&ok); 2229 assembler->BindBlock(&ok);
2317 } else if (next_is_word_character == Trace::TRUE_VALUE) { 2230 } else if (next_is_word_character == Trace::TRUE_VALUE) {
2318 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 2231 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
2319 } else { 2232 } else {
2320 ASSERT(next_is_word_character == Trace::FALSE_VALUE); 2233 ASSERT(next_is_word_character == Trace::FALSE_VALUE);
2321 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 2234 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
2322 } 2235 }
2323 } 2236 }
2324 2237
2325
2326 void AssertionNode::BacktrackIfPrevious( 2238 void AssertionNode::BacktrackIfPrevious(
2327 RegExpCompiler* compiler, 2239 RegExpCompiler* compiler,
2328 Trace* trace, 2240 Trace* trace,
2329 AssertionNode::IfPrevious backtrack_if_previous) { 2241 AssertionNode::IfPrevious backtrack_if_previous) {
2330 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2242 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2331 Trace new_trace(*trace); 2243 Trace new_trace(*trace);
2332 new_trace.InvalidateCurrentCharacter(); 2244 new_trace.InvalidateCurrentCharacter();
2333 2245
2334 BlockLabel fall_through, dummy; 2246 BlockLabel fall_through, dummy;
2335 2247
(...skipping 11 matching lines...) Expand all
2347 } 2259 }
2348 // We already checked that we are not at the start of input so it must be 2260 // We already checked that we are not at the start of input so it must be
2349 // OK to load the previous character. 2261 // OK to load the previous character.
2350 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); 2262 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
2351 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); 2263 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2352 2264
2353 assembler->BindBlock(&fall_through); 2265 assembler->BindBlock(&fall_through);
2354 on_success()->Emit(compiler, &new_trace); 2266 on_success()->Emit(compiler, &new_trace);
2355 } 2267 }
2356 2268
2357
2358 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, 2269 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2359 RegExpCompiler* compiler, 2270 RegExpCompiler* compiler,
2360 intptr_t filled_in, 2271 intptr_t filled_in,
2361 bool not_at_start) { 2272 bool not_at_start) {
2362 if (assertion_type_ == AT_START && not_at_start) { 2273 if (assertion_type_ == AT_START && not_at_start) {
2363 details->set_cannot_match(); 2274 details->set_cannot_match();
2364 return; 2275 return;
2365 } 2276 }
2366 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, 2277 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2367 not_at_start); 2278 not_at_start);
2368 } 2279 }
2369 2280
2370
2371 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2281 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2372 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2282 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2373 switch (assertion_type_) { 2283 switch (assertion_type_) {
2374 case AT_END: { 2284 case AT_END: {
2375 BlockLabel ok; 2285 BlockLabel ok;
2376 assembler->CheckPosition(trace->cp_offset(), &ok); 2286 assembler->CheckPosition(trace->cp_offset(), &ok);
2377 assembler->GoTo(trace->backtrack()); 2287 assembler->GoTo(trace->backtrack());
2378 assembler->BindBlock(&ok); 2288 assembler->BindBlock(&ok);
2379 break; 2289 break;
2380 } 2290 }
(...skipping 15 matching lines...) Expand all
2396 return; 2306 return;
2397 case AT_BOUNDARY: 2307 case AT_BOUNDARY:
2398 case AT_NON_BOUNDARY: { 2308 case AT_NON_BOUNDARY: {
2399 EmitBoundaryCheck(compiler, trace); 2309 EmitBoundaryCheck(compiler, trace);
2400 return; 2310 return;
2401 } 2311 }
2402 } 2312 }
2403 on_success()->Emit(compiler, trace); 2313 on_success()->Emit(compiler, trace);
2404 } 2314 }
2405 2315
2406
2407 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { 2316 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) {
2408 if (quick_check == NULL) return false; 2317 if (quick_check == NULL) return false;
2409 if (offset >= quick_check->characters()) return false; 2318 if (offset >= quick_check->characters()) return false;
2410 return quick_check->positions(offset)->determines_perfectly; 2319 return quick_check->positions(offset)->determines_perfectly;
2411 } 2320 }
2412 2321
2413
2414 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { 2322 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) {
2415 if (index > *checked_up_to) { 2323 if (index > *checked_up_to) {
2416 *checked_up_to = index; 2324 *checked_up_to = index;
2417 } 2325 }
2418 } 2326 }
2419 2327
2420
2421 // We call this repeatedly to generate code for each pass over the text node. 2328 // We call this repeatedly to generate code for each pass over the text node.
2422 // The passes are in increasing order of difficulty because we hope one 2329 // The passes are in increasing order of difficulty because we hope one
2423 // of the first passes will fail in which case we are saved the work of the 2330 // of the first passes will fail in which case we are saved the work of the
2424 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ 2331 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
2425 // we will check the '%' in the first pass, the case independent 'a' in the 2332 // we will check the '%' in the first pass, the case independent 'a' in the
2426 // second pass and the character class in the last pass. 2333 // second pass and the character class in the last pass.
2427 // 2334 //
2428 // The passes are done from right to left, so for example to test for /bar/ 2335 // The passes are done from right to left, so for example to test for /bar/
2429 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 2336 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
2430 // and then a 'b' with offset 0. This means we can avoid the end-of-input 2337 // and then a 'b' with offset 0. This means we can avoid the end-of-input
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
2501 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 2408 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2502 RegExpCharacterClass* cc = elm.char_class(); 2409 RegExpCharacterClass* cc = elm.char_class();
2503 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, 2410 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
2504 *checked_up_to < cp_offset, preloaded, Z); 2411 *checked_up_to < cp_offset, preloaded, Z);
2505 UpdateBoundsCheck(cp_offset, checked_up_to); 2412 UpdateBoundsCheck(cp_offset, checked_up_to);
2506 } 2413 }
2507 } 2414 }
2508 } 2415 }
2509 } 2416 }
2510 2417
2511
2512 intptr_t TextNode::Length() { 2418 intptr_t TextNode::Length() {
2513 TextElement elm = elms_->Last(); 2419 TextElement elm = elms_->Last();
2514 ASSERT(elm.cp_offset() >= 0); 2420 ASSERT(elm.cp_offset() >= 0);
2515 return elm.cp_offset() + elm.length(); 2421 return elm.cp_offset() + elm.length();
2516 } 2422 }
2517 2423
2518
2519 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { 2424 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) {
2520 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); 2425 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass);
2521 if (ignore_case) { 2426 if (ignore_case) {
2522 return pass == SIMPLE_CHARACTER_MATCH; 2427 return pass == SIMPLE_CHARACTER_MATCH;
2523 } else { 2428 } else {
2524 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 2429 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
2525 } 2430 }
2526 } 2431 }
2527 2432
2528
2529 // This generates the code to match a text node. A text node can contain 2433 // This generates the code to match a text node. A text node can contain
2530 // straight character sequences (possibly to be matched in a case-independent 2434 // straight character sequences (possibly to be matched in a case-independent
2531 // way) and character classes. For efficiency we do not do this in a single 2435 // way) and character classes. For efficiency we do not do this in a single
2532 // pass from left to right. Instead we pass over the text node several times, 2436 // pass from left to right. Instead we pass over the text node several times,
2533 // emitting code for some character positions every time. See the comment on 2437 // emitting code for some character positions every time. See the comment on
2534 // TextEmitPass for details. 2438 // TextEmitPass for details.
2535 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2439 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2536 LimitResult limit_result = LimitVersions(compiler, trace); 2440 LimitResult limit_result = LimitVersions(compiler, trace);
2537 if (limit_result == DONE) return; 2441 if (limit_result == DONE) return;
2538 ASSERT(limit_result == CONTINUE); 2442 ASSERT(limit_result == CONTINUE);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
2570 } 2474 }
2571 } 2475 }
2572 2476
2573 Trace successor_trace(*trace); 2477 Trace successor_trace(*trace);
2574 successor_trace.set_at_start(false); 2478 successor_trace.set_at_start(false);
2575 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); 2479 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
2576 RecursionCheck rc(compiler); 2480 RecursionCheck rc(compiler);
2577 on_success()->Emit(compiler, &successor_trace); 2481 on_success()->Emit(compiler, &successor_trace);
2578 } 2482 }
2579 2483
2580
2581 void Trace::InvalidateCurrentCharacter() { 2484 void Trace::InvalidateCurrentCharacter() {
2582 characters_preloaded_ = 0; 2485 characters_preloaded_ = 0;
2583 } 2486 }
2584 2487
2585
2586 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, 2488 void Trace::AdvanceCurrentPositionInTrace(intptr_t by,
2587 RegExpCompiler* compiler) { 2489 RegExpCompiler* compiler) {
2588 ASSERT(by > 0); 2490 ASSERT(by > 0);
2589 // We don't have an instruction for shifting the current character register 2491 // We don't have an instruction for shifting the current character register
2590 // down or for using a shifted value for anything so lets just forget that 2492 // down or for using a shifted value for anything so lets just forget that
2591 // we preloaded any characters into it. 2493 // we preloaded any characters into it.
2592 characters_preloaded_ = 0; 2494 characters_preloaded_ = 0;
2593 // Adjust the offsets of the quick check performed information. This 2495 // Adjust the offsets of the quick check performed information. This
2594 // information is used to find out what we already determined about the 2496 // information is used to find out what we already determined about the
2595 // characters by means of mask and compare. 2497 // characters by means of mask and compare.
2596 quick_check_performed_.Advance(by, compiler->one_byte()); 2498 quick_check_performed_.Advance(by, compiler->one_byte());
2597 cp_offset_ += by; 2499 cp_offset_ += by;
2598 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 2500 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2599 compiler->SetRegExpTooBig(); 2501 compiler->SetRegExpTooBig();
2600 cp_offset_ = 0; 2502 cp_offset_ = 0;
2601 } 2503 }
2602 bound_checked_up_to_ = 2504 bound_checked_up_to_ =
2603 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); 2505 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by);
2604 } 2506 }
2605 2507
2606
2607 void TextNode::MakeCaseIndependent(bool is_one_byte) { 2508 void TextNode::MakeCaseIndependent(bool is_one_byte) {
2608 intptr_t element_count = elms_->length(); 2509 intptr_t element_count = elms_->length();
2609 for (intptr_t i = 0; i < element_count; i++) { 2510 for (intptr_t i = 0; i < element_count; i++) {
2610 TextElement elm = elms_->At(i); 2511 TextElement elm = elms_->At(i);
2611 if (elm.text_type() == TextElement::CHAR_CLASS) { 2512 if (elm.text_type() == TextElement::CHAR_CLASS) {
2612 RegExpCharacterClass* cc = elm.char_class(); 2513 RegExpCharacterClass* cc = elm.char_class();
2613 // None of the standard character classes is different in the case 2514 // None of the standard character classes is different in the case
2614 // independent case and it slows us down if we don't know that. 2515 // independent case and it slows us down if we don't know that.
2615 if (cc->is_standard()) continue; 2516 if (cc->is_standard()) continue;
2616 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); 2517 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
2617 intptr_t range_count = ranges->length(); 2518 intptr_t range_count = ranges->length();
2618 for (intptr_t j = 0; j < range_count; j++) { 2519 for (intptr_t j = 0; j < range_count; j++) {
2619 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z); 2520 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z);
2620 } 2521 }
2621 } 2522 }
2622 } 2523 }
2623 } 2524 }
2624 2525
2625
2626 intptr_t TextNode::GreedyLoopTextLength() { 2526 intptr_t TextNode::GreedyLoopTextLength() {
2627 TextElement elm = elms_->At(elms_->length() - 1); 2527 TextElement elm = elms_->At(elms_->length() - 1);
2628 return elm.cp_offset() + elm.length(); 2528 return elm.cp_offset() + elm.length();
2629 } 2529 }
2630 2530
2631
2632 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 2531 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
2633 RegExpCompiler* compiler) { 2532 RegExpCompiler* compiler) {
2634 if (elms_->length() != 1) return NULL; 2533 if (elms_->length() != 1) return NULL;
2635 TextElement elm = elms_->At(0); 2534 TextElement elm = elms_->At(0);
2636 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 2535 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
2637 RegExpCharacterClass* node = elm.char_class(); 2536 RegExpCharacterClass* node = elm.char_class();
2638 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); 2537 ZoneGrowableArray<CharacterRange>* ranges = node->ranges();
2639 if (!CharacterRange::IsCanonical(ranges)) { 2538 if (!CharacterRange::IsCanonical(ranges)) {
2640 CharacterRange::Canonicalize(ranges); 2539 CharacterRange::Canonicalize(ranges);
2641 } 2540 }
2642 if (node->is_negated()) { 2541 if (node->is_negated()) {
2643 return ranges->length() == 0 ? on_success() : NULL; 2542 return ranges->length() == 0 ? on_success() : NULL;
2644 } 2543 }
2645 if (ranges->length() != 1) return NULL; 2544 if (ranges->length() != 1) return NULL;
2646 uint32_t max_char; 2545 uint32_t max_char;
2647 if (compiler->one_byte()) { 2546 if (compiler->one_byte()) {
2648 max_char = Symbols::kMaxOneCharCodeSymbol; 2547 max_char = Symbols::kMaxOneCharCodeSymbol;
2649 } else { 2548 } else {
2650 max_char = Utf16::kMaxCodeUnit; 2549 max_char = Utf16::kMaxCodeUnit;
2651 } 2550 }
2652 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; 2551 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL;
2653 } 2552 }
2654 2553
2655
2656 // Finds the fixed match length of a sequence of nodes that goes from 2554 // Finds the fixed match length of a sequence of nodes that goes from
2657 // this alternative and back to this choice node. If there are variable 2555 // this alternative and back to this choice node. If there are variable
2658 // length nodes or other complications in the way then return a sentinel 2556 // length nodes or other complications in the way then return a sentinel
2659 // value indicating that a greedy loop cannot be constructed. 2557 // value indicating that a greedy loop cannot be constructed.
2660 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( 2558 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative(
2661 GuardedAlternative* alternative) { 2559 GuardedAlternative* alternative) {
2662 intptr_t length = 0; 2560 intptr_t length = 0;
2663 RegExpNode* node = alternative->node(); 2561 RegExpNode* node = alternative->node();
2664 // Later we will generate code for all these text nodes using recursion 2562 // Later we will generate code for all these text nodes using recursion
2665 // so we have to limit the max number. 2563 // so we have to limit the max number.
2666 intptr_t recursion_depth = 0; 2564 intptr_t recursion_depth = 0;
2667 while (node != this) { 2565 while (node != this) {
2668 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 2566 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2669 return kNodeIsTooComplexForGreedyLoops; 2567 return kNodeIsTooComplexForGreedyLoops;
2670 } 2568 }
2671 intptr_t node_length = node->GreedyLoopTextLength(); 2569 intptr_t node_length = node->GreedyLoopTextLength();
2672 if (node_length == kNodeIsTooComplexForGreedyLoops) { 2570 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2673 return kNodeIsTooComplexForGreedyLoops; 2571 return kNodeIsTooComplexForGreedyLoops;
2674 } 2572 }
2675 length += node_length; 2573 length += node_length;
2676 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 2574 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2677 node = seq_node->on_success(); 2575 node = seq_node->on_success();
2678 } 2576 }
2679 return length; 2577 return length;
2680 } 2578 }
2681 2579
2682
2683 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { 2580 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2684 ASSERT(loop_node_ == NULL); 2581 ASSERT(loop_node_ == NULL);
2685 AddAlternative(alt); 2582 AddAlternative(alt);
2686 loop_node_ = alt.node(); 2583 loop_node_ = alt.node();
2687 } 2584 }
2688 2585
2689
2690 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { 2586 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2691 ASSERT(continue_node_ == NULL); 2587 ASSERT(continue_node_ == NULL);
2692 AddAlternative(alt); 2588 AddAlternative(alt);
2693 continue_node_ = alt.node(); 2589 continue_node_ = alt.node();
2694 } 2590 }
2695 2591
2696
2697 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2592 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2698 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 2593 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2699 if (trace->stop_node() == this) { 2594 if (trace->stop_node() == this) {
2700 // Back edge of greedy optimized loop node graph. 2595 // Back edge of greedy optimized loop node graph.
2701 intptr_t text_length = 2596 intptr_t text_length =
2702 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); 2597 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));
2703 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); 2598 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
2704 // Update the counter-based backtracking info on the stack. This is an 2599 // Update the counter-based backtracking info on the stack. This is an
2705 // optimization for greedy loops (see below). 2600 // optimization for greedy loops (see below).
2706 ASSERT(trace->cp_offset() == text_length); 2601 ASSERT(trace->cp_offset() == text_length);
2707 macro_assembler->AdvanceCurrentPosition(text_length); 2602 macro_assembler->AdvanceCurrentPosition(text_length);
2708 macro_assembler->GoTo(trace->loop_label()); 2603 macro_assembler->GoTo(trace->loop_label());
2709 return; 2604 return;
2710 } 2605 }
2711 ASSERT(trace->stop_node() == NULL); 2606 ASSERT(trace->stop_node() == NULL);
2712 if (!trace->is_trivial()) { 2607 if (!trace->is_trivial()) {
2713 trace->Flush(compiler, this); 2608 trace->Flush(compiler, this);
2714 return; 2609 return;
2715 } 2610 }
2716 ChoiceNode::Emit(compiler, trace); 2611 ChoiceNode::Emit(compiler, trace);
2717 } 2612 }
2718 2613
2719
2720 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 2614 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2721 intptr_t eats_at_least) { 2615 intptr_t eats_at_least) {
2722 intptr_t preload_characters = 2616 intptr_t preload_characters =
2723 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); 2617 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least);
2724 if (compiler->macro_assembler()->CanReadUnaligned()) { 2618 if (compiler->macro_assembler()->CanReadUnaligned()) {
2725 bool one_byte = compiler->one_byte(); 2619 bool one_byte = compiler->one_byte();
2726 if (one_byte) { 2620 if (one_byte) {
2727 if (preload_characters > 4) preload_characters = 4; 2621 if (preload_characters > 4) preload_characters = 4;
2728 // We can't preload 3 characters because there is no machine instruction 2622 // We can't preload 3 characters because there is no machine instruction
2729 // to do that. We can't just load 4 because we could be reading 2623 // to do that. We can't just load 4 because we could be reading
2730 // beyond the end of the string, which could cause a memory fault. 2624 // beyond the end of the string, which could cause a memory fault.
2731 if (preload_characters == 3) preload_characters = 2; 2625 if (preload_characters == 3) preload_characters = 2;
2732 } else { 2626 } else {
2733 if (preload_characters > 2) preload_characters = 2; 2627 if (preload_characters > 2) preload_characters = 2;
2734 } 2628 }
2735 } else { 2629 } else {
2736 if (preload_characters > 1) preload_characters = 1; 2630 if (preload_characters > 1) preload_characters = 1;
2737 } 2631 }
2738 return preload_characters; 2632 return preload_characters;
2739 } 2633 }
2740 2634
2741
2742 // This structure is used when generating the alternatives in a choice node. It 2635 // This structure is used when generating the alternatives in a choice node. It
2743 // records the way the alternative is being code generated. 2636 // records the way the alternative is being code generated.
2744 struct AlternativeGeneration { 2637 struct AlternativeGeneration {
2745 AlternativeGeneration() 2638 AlternativeGeneration()
2746 : possible_success(), 2639 : possible_success(),
2747 expects_preload(false), 2640 expects_preload(false),
2748 after(), 2641 after(),
2749 quick_check_details() {} 2642 quick_check_details() {}
2750 BlockLabel possible_success; 2643 BlockLabel possible_success;
2751 bool expects_preload; 2644 bool expects_preload;
2752 BlockLabel after; 2645 BlockLabel after;
2753 QuickCheckDetails quick_check_details; 2646 QuickCheckDetails quick_check_details;
2754 }; 2647 };
2755 2648
2756
2757 // Creates a list of AlternativeGenerations. If the list has a reasonable 2649 // Creates a list of AlternativeGenerations. If the list has a reasonable
2758 // size then it is on the stack, otherwise the excess is on the heap. 2650 // size then it is on the stack, otherwise the excess is on the heap.
2759 class AlternativeGenerationList { 2651 class AlternativeGenerationList {
2760 public: 2652 public:
2761 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { 2653 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) {
2762 for (intptr_t i = 0; i < count && i < kAFew; i++) { 2654 for (intptr_t i = 0; i < count && i < kAFew; i++) {
2763 alt_gens_.Add(a_few_alt_gens_ + i); 2655 alt_gens_.Add(a_few_alt_gens_ + i);
2764 } 2656 }
2765 for (intptr_t i = kAFew; i < count; i++) { 2657 for (intptr_t i = kAFew; i < count; i++) {
2766 alt_gens_.Add(new AlternativeGeneration()); 2658 alt_gens_.Add(new AlternativeGeneration());
2767 } 2659 }
2768 } 2660 }
2769 ~AlternativeGenerationList() { 2661 ~AlternativeGenerationList() {
2770 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { 2662 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) {
2771 delete alt_gens_[i]; 2663 delete alt_gens_[i];
2772 alt_gens_[i] = NULL; 2664 alt_gens_[i] = NULL;
2773 } 2665 }
2774 } 2666 }
2775 2667
2776 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } 2668 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; }
2777 2669
2778 private: 2670 private:
2779 static const intptr_t kAFew = 10; 2671 static const intptr_t kAFew = 10;
2780 GrowableArray<AlternativeGeneration*> alt_gens_; 2672 GrowableArray<AlternativeGeneration*> alt_gens_;
2781 AlternativeGeneration a_few_alt_gens_[kAFew]; 2673 AlternativeGeneration a_few_alt_gens_[kAFew];
2782 2674
2783 DISALLOW_ALLOCATION(); 2675 DISALLOW_ALLOCATION();
2784 }; 2676 };
2785 2677
2786
2787 // The '2' variant is inclusive from and exclusive to. 2678 // The '2' variant is inclusive from and exclusive to.
2788 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, 2679 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
2789 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. 2680 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
2790 static const intptr_t kSpaceRanges[] = { 2681 static const intptr_t kSpaceRanges[] = {
2791 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, 2682 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681,
2792 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, 2683 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
2793 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; 2684 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000};
2794 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); 2685 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
2795 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', 2686 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_',
2796 '_' + 1, 'a', 'z' + 1, 0x10000}; 2687 '_' + 1, 'a', 'z' + 1, 0x10000};
2797 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); 2688 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);
2798 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; 2689 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000};
2799 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); 2690 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
2800 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; 2691 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000};
2801 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); 2692 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
2802 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, 2693 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E,
2803 0x2028, 0x202A, 0x10000}; 2694 0x2028, 0x202A, 0x10000};
2804 static const intptr_t kLineTerminatorRangeCount = 2695 static const intptr_t kLineTerminatorRangeCount =
2805 ARRAY_SIZE(kLineTerminatorRanges); 2696 ARRAY_SIZE(kLineTerminatorRanges);
2806 2697
2807
2808 void BoyerMoorePositionInfo::Set(intptr_t character) { 2698 void BoyerMoorePositionInfo::Set(intptr_t character) {
2809 SetInterval(Interval(character, character)); 2699 SetInterval(Interval(character, character));
2810 } 2700 }
2811 2701
2812
2813 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 2702 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2814 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); 2703 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
2815 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 2704 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2816 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); 2705 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
2817 surrogate_ = 2706 surrogate_ =
2818 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); 2707 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
2819 if (interval.to() - interval.from() >= kMapSize - 1) { 2708 if (interval.to() - interval.from() >= kMapSize - 1) {
2820 if (map_count_ != kMapSize) { 2709 if (map_count_ != kMapSize) {
2821 map_count_ = kMapSize; 2710 map_count_ = kMapSize;
2822 for (intptr_t i = 0; i < kMapSize; i++) 2711 for (intptr_t i = 0; i < kMapSize; i++)
2823 (*map_)[i] = true; 2712 (*map_)[i] = true;
2824 } 2713 }
2825 return; 2714 return;
2826 } 2715 }
2827 for (intptr_t i = interval.from(); i <= interval.to(); i++) { 2716 for (intptr_t i = interval.from(); i <= interval.to(); i++) {
2828 intptr_t mod_character = (i & kMask); 2717 intptr_t mod_character = (i & kMask);
2829 if (!map_->At(mod_character)) { 2718 if (!map_->At(mod_character)) {
2830 map_count_++; 2719 map_count_++;
2831 (*map_)[mod_character] = true; 2720 (*map_)[mod_character] = true;
2832 } 2721 }
2833 if (map_count_ == kMapSize) return; 2722 if (map_count_ == kMapSize) return;
2834 } 2723 }
2835 } 2724 }
2836 2725
2837
2838 void BoyerMoorePositionInfo::SetAll() { 2726 void BoyerMoorePositionInfo::SetAll() {
2839 s_ = w_ = d_ = kLatticeUnknown; 2727 s_ = w_ = d_ = kLatticeUnknown;
2840 if (map_count_ != kMapSize) { 2728 if (map_count_ != kMapSize) {
2841 map_count_ = kMapSize; 2729 map_count_ = kMapSize;
2842 for (intptr_t i = 0; i < kMapSize; i++) 2730 for (intptr_t i = 0; i < kMapSize; i++)
2843 (*map_)[i] = true; 2731 (*map_)[i] = true;
2844 } 2732 }
2845 } 2733 }
2846 2734
2847
2848 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, 2735 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length,
2849 RegExpCompiler* compiler, 2736 RegExpCompiler* compiler,
2850 Zone* zone) 2737 Zone* zone)
2851 : length_(length), compiler_(compiler) { 2738 : length_(length), compiler_(compiler) {
2852 if (compiler->one_byte()) { 2739 if (compiler->one_byte()) {
2853 max_char_ = Symbols::kMaxOneCharCodeSymbol; 2740 max_char_ = Symbols::kMaxOneCharCodeSymbol;
2854 } else { 2741 } else {
2855 max_char_ = Utf16::kMaxCodeUnit; 2742 max_char_ = Utf16::kMaxCodeUnit;
2856 } 2743 }
2857 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); 2744 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);
2858 for (intptr_t i = 0; i < length; i++) { 2745 for (intptr_t i = 0; i < length; i++) {
2859 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); 2746 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone));
2860 } 2747 }
2861 } 2748 }
2862 2749
2863
2864 // Find the longest range of lookahead that has the fewest number of different 2750 // Find the longest range of lookahead that has the fewest number of different
2865 // characters that can occur at a given position. Since we are optimizing two 2751 // characters that can occur at a given position. Since we are optimizing two
2866 // different parameters at once this is a tradeoff. 2752 // different parameters at once this is a tradeoff.
2867 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { 2753 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) {
2868 intptr_t biggest_points = 0; 2754 intptr_t biggest_points = 0;
2869 // If more than 32 characters out of 128 can occur it is unlikely that we can 2755 // If more than 32 characters out of 128 can occur it is unlikely that we can
2870 // be lucky enough to step forwards much of the time. 2756 // be lucky enough to step forwards much of the time.
2871 const intptr_t kMaxMax = 32; 2757 const intptr_t kMaxMax = 32;
2872 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; 2758 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax;
2873 max_number_of_chars *= 2) { 2759 max_number_of_chars *= 2) {
2874 biggest_points = 2760 biggest_points =
2875 FindBestInterval(max_number_of_chars, biggest_points, from, to); 2761 FindBestInterval(max_number_of_chars, biggest_points, from, to);
2876 } 2762 }
2877 if (biggest_points == 0) return false; 2763 if (biggest_points == 0) return false;
2878 return true; 2764 return true;
2879 } 2765 }
2880 2766
2881
2882 // Find the highest-points range between 0 and length_ where the character 2767 // Find the highest-points range between 0 and length_ where the character
2883 // information is not too vague. 'Too vague' means that there are more than 2768 // information is not too vague. 'Too vague' means that there are more than
2884 // max_number_of_chars that can occur at this position. Calculates the number 2769 // max_number_of_chars that can occur at this position. Calculates the number
2885 // of points as the product of width-of-the-range and 2770 // of points as the product of width-of-the-range and
2886 // probability-of-finding-one-of-the-characters, where the probability is 2771 // probability-of-finding-one-of-the-characters, where the probability is
2887 // calculated using the frequency distribution of the sample subject string. 2772 // calculated using the frequency distribution of the sample subject string.
2888 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, 2773 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars,
2889 intptr_t old_biggest_points, 2774 intptr_t old_biggest_points,
2890 intptr_t* from, 2775 intptr_t* from,
2891 intptr_t* to) { 2776 intptr_t* to) {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
2931 intptr_t points = (i - remembered_from) * probability; 2816 intptr_t points = (i - remembered_from) * probability;
2932 if (points > biggest_points) { 2817 if (points > biggest_points) {
2933 *from = remembered_from; 2818 *from = remembered_from;
2934 *to = i - 1; 2819 *to = i - 1;
2935 biggest_points = points; 2820 biggest_points = points;
2936 } 2821 }
2937 } 2822 }
2938 return biggest_points; 2823 return biggest_points;
2939 } 2824 }
2940 2825
2941
2942 // Take all the characters that will not prevent a successful match if they 2826 // Take all the characters that will not prevent a successful match if they
2943 // occur in the subject string in the range between min_lookahead and 2827 // occur in the subject string in the range between min_lookahead and
2944 // max_lookahead (inclusive) measured from the current position. If the 2828 // max_lookahead (inclusive) measured from the current position. If the
2945 // character at max_lookahead offset is not one of these characters, then we 2829 // character at max_lookahead offset is not one of these characters, then we
2946 // can safely skip forwards by the number of characters in the range. 2830 // can safely skip forwards by the number of characters in the range.
2947 intptr_t BoyerMooreLookahead::GetSkipTable( 2831 intptr_t BoyerMooreLookahead::GetSkipTable(
2948 intptr_t min_lookahead, 2832 intptr_t min_lookahead,
2949 intptr_t max_lookahead, 2833 intptr_t max_lookahead,
2950 const TypedData& boolean_skip_table) { 2834 const TypedData& boolean_skip_table) {
2951 const intptr_t kSize = RegExpMacroAssembler::kTableSize; 2835 const intptr_t kSize = RegExpMacroAssembler::kTableSize;
(...skipping 11 matching lines...) Expand all
2963 for (intptr_t j = 0; j < kSize; j++) { 2847 for (intptr_t j = 0; j < kSize; j++) {
2964 if (map->at(j)) { 2848 if (map->at(j)) {
2965 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); 2849 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry);
2966 } 2850 }
2967 } 2851 }
2968 } 2852 }
2969 2853
2970 return skip; 2854 return skip;
2971 } 2855 }
2972 2856
2973
2974 // See comment above on the implementation of GetSkipTable. 2857 // See comment above on the implementation of GetSkipTable.
2975 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { 2858 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
2976 const intptr_t kSize = RegExpMacroAssembler::kTableSize; 2859 const intptr_t kSize = RegExpMacroAssembler::kTableSize;
2977 2860
2978 intptr_t min_lookahead = 0; 2861 intptr_t min_lookahead = 0;
2979 intptr_t max_lookahead = 0; 2862 intptr_t max_lookahead = 0;
2980 2863
2981 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; 2864 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
2982 2865
2983 bool found_single_character = false; 2866 bool found_single_character = false;
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
3034 masm->CheckPreemption(/*is_backtrack=*/false); 2917 masm->CheckPreemption(/*is_backtrack=*/false);
3035 masm->LoadCurrentCharacter(max_lookahead, &cont, true); 2918 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3036 masm->CheckBitInTable(boolean_skip_table, &cont); 2919 masm->CheckBitInTable(boolean_skip_table, &cont);
3037 masm->AdvanceCurrentPosition(skip_distance); 2920 masm->AdvanceCurrentPosition(skip_distance);
3038 masm->GoTo(&again); 2921 masm->GoTo(&again);
3039 masm->BindBlock(&cont); 2922 masm->BindBlock(&cont);
3040 2923
3041 return; 2924 return;
3042 } 2925 }
3043 2926
3044
3045 /* Code generation for choice nodes. 2927 /* Code generation for choice nodes.
3046 * 2928 *
3047 * We generate quick checks that do a mask and compare to eliminate a 2929 * We generate quick checks that do a mask and compare to eliminate a
3048 * choice. If the quick check succeeds then it jumps to the continuation to 2930 * choice. If the quick check succeeds then it jumps to the continuation to
3049 * do slow checks and check subsequent nodes. If it fails (the common case) 2931 * do slow checks and check subsequent nodes. If it fails (the common case)
3050 * it falls through to the next choice. 2932 * it falls through to the next choice.
3051 * 2933 *
3052 * Here is the desired flow graph. Nodes directly below each other imply 2934 * Here is the desired flow graph. Nodes directly below each other imply
3053 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative 2935 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
3054 * 3 doesn't have a quick check so we have to call the slow check. 2936 * 3 doesn't have a quick check so we have to call the slow check.
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
3114 * S| / 2996 * S| /
3115 * V F / 2997 * V F /
3116 * S2--/ 2998 * S2--/
3117 */ 2999 */
3118 3000
3119 GreedyLoopState::GreedyLoopState(bool not_at_start) { 3001 GreedyLoopState::GreedyLoopState(bool not_at_start) {
3120 counter_backtrack_trace_.set_backtrack(&label_); 3002 counter_backtrack_trace_.set_backtrack(&label_);
3121 if (not_at_start) counter_backtrack_trace_.set_at_start(false); 3003 if (not_at_start) counter_backtrack_trace_.set_at_start(false);
3122 } 3004 }
3123 3005
3124
3125 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { 3006 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3126 #ifdef DEBUG 3007 #ifdef DEBUG
3127 intptr_t choice_count = alternatives_->length(); 3008 intptr_t choice_count = alternatives_->length();
3128 for (intptr_t i = 0; i < choice_count - 1; i++) { 3009 for (intptr_t i = 0; i < choice_count - 1; i++) {
3129 GuardedAlternative alternative = alternatives_->At(i); 3010 GuardedAlternative alternative = alternatives_->At(i);
3130 ZoneGrowableArray<Guard*>* guards = alternative.guards(); 3011 ZoneGrowableArray<Guard*>* guards = alternative.guards();
3131 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); 3012 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();
3132 for (intptr_t j = 0; j < guard_count; j++) { 3013 for (intptr_t j = 0; j < guard_count; j++) {
3133 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); 3014 ASSERT(!trace->mentions_reg(guards->At(j)->reg()));
3134 } 3015 }
3135 } 3016 }
3136 #endif 3017 #endif
3137 } 3018 }
3138 3019
3139
3140 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, 3020 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
3141 Trace* current_trace, 3021 Trace* current_trace,
3142 PreloadState* state) { 3022 PreloadState* state) {
3143 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { 3023 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3144 // Save some time by looking at most one machine word ahead. 3024 // Save some time by looking at most one machine word ahead.
3145 state->eats_at_least_ = 3025 state->eats_at_least_ =
3146 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, 3026 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
3147 current_trace->at_start() == Trace::FALSE_VALUE); 3027 current_trace->at_start() == Trace::FALSE_VALUE);
3148 } 3028 }
3149 state->preload_characters_ = 3029 state->preload_characters_ =
3150 CalculatePreloadCharacters(compiler, state->eats_at_least_); 3030 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3151 3031
3152 state->preload_is_current_ = 3032 state->preload_is_current_ =
3153 (current_trace->characters_preloaded() == state->preload_characters_); 3033 (current_trace->characters_preloaded() == state->preload_characters_);
3154 state->preload_has_checked_bounds_ = state->preload_is_current_; 3034 state->preload_has_checked_bounds_ = state->preload_is_current_;
3155 } 3035 }
3156 3036
3157
3158 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3037 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3159 intptr_t choice_count = alternatives_->length(); 3038 intptr_t choice_count = alternatives_->length();
3160 3039
3161 AssertGuardsMentionRegisters(trace); 3040 AssertGuardsMentionRegisters(trace);
3162 3041
3163 LimitResult limit_result = LimitVersions(compiler, trace); 3042 LimitResult limit_result = LimitVersions(compiler, trace);
3164 if (limit_result == DONE) return; 3043 if (limit_result == DONE) return;
3165 ASSERT(limit_result == CONTINUE); 3044 ASSERT(limit_result == CONTINUE);
3166 3045
3167 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for 3046 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
3253 3132
3254 macro_assembler->BindBlock(greedy_loop_state->label()); 3133 macro_assembler->BindBlock(greedy_loop_state->label());
3255 // If we have unwound to the bottom then backtrack. 3134 // If we have unwound to the bottom then backtrack.
3256 macro_assembler->CheckGreedyLoop(trace->backtrack()); 3135 macro_assembler->CheckGreedyLoop(trace->backtrack());
3257 // Otherwise try the second priority at an earlier position. 3136 // Otherwise try the second priority at an earlier position.
3258 macro_assembler->AdvanceCurrentPosition(-text_length); 3137 macro_assembler->AdvanceCurrentPosition(-text_length);
3259 macro_assembler->GoTo(&second_choice); 3138 macro_assembler->GoTo(&second_choice);
3260 return new_trace; 3139 return new_trace;
3261 } 3140 }
3262 3141
3263
3264 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, 3142 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
3265 Trace* trace) { 3143 Trace* trace) {
3266 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; 3144 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
3267 if (alternatives_->length() != 2) return eats_at_least; 3145 if (alternatives_->length() != 2) return eats_at_least;
3268 3146
3269 GuardedAlternative alt1 = alternatives_->At(1); 3147 GuardedAlternative alt1 = alternatives_->At(1);
3270 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { 3148 if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
3271 return eats_at_least; 3149 return eats_at_least;
3272 } 3150 }
3273 RegExpNode* eats_anything_node = alt1.node(); 3151 RegExpNode* eats_anything_node = alt1.node();
(...skipping 27 matching lines...) Expand all
3301 GuardedAlternative alt0 = alternatives_->At(0); 3179 GuardedAlternative alt0 = alternatives_->At(0);
3302 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); 3180 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
3303 } 3181 }
3304 } 3182 }
3305 if (bm != NULL) { 3183 if (bm != NULL) {
3306 bm->EmitSkipInstructions(macro_assembler); 3184 bm->EmitSkipInstructions(macro_assembler);
3307 } 3185 }
3308 return eats_at_least; 3186 return eats_at_least;
3309 } 3187 }
3310 3188
3311
3312 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, 3189 void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
3313 AlternativeGenerationList* alt_gens, 3190 AlternativeGenerationList* alt_gens,
3314 intptr_t first_choice, 3191 intptr_t first_choice,
3315 Trace* trace, 3192 Trace* trace,
3316 PreloadState* preload) { 3193 PreloadState* preload) {
3317 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3194 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3318 SetUpPreLoad(compiler, trace, preload); 3195 SetUpPreLoad(compiler, trace, preload);
3319 3196
3320 // For now we just call all choices one after the other. The idea ultimately 3197 // For now we just call all choices one after the other. The idea ultimately
3321 // is to use the Dispatch table to try only the relevant ones. 3198 // is to use the Dispatch table to try only the relevant ones.
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
3386 for (intptr_t j = 0; j < guard_count; j++) { 3263 for (intptr_t j = 0; j < guard_count; j++) {
3387 GenerateGuard(macro_assembler, guards->At(j), &new_trace); 3264 GenerateGuard(macro_assembler, guards->At(j), &new_trace);
3388 } 3265 }
3389 alternative.node()->Emit(compiler, &new_trace); 3266 alternative.node()->Emit(compiler, &new_trace);
3390 preload->preload_is_current_ = false; 3267 preload->preload_is_current_ = false;
3391 } 3268 }
3392 macro_assembler->BindBlock(&alt_gen->after); 3269 macro_assembler->BindBlock(&alt_gen->after);
3393 } 3270 }
3394 } 3271 }
3395 3272
3396
3397 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, 3273 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
3398 Trace* trace, 3274 Trace* trace,
3399 GuardedAlternative alternative, 3275 GuardedAlternative alternative,
3400 AlternativeGeneration* alt_gen, 3276 AlternativeGeneration* alt_gen,
3401 intptr_t preload_characters, 3277 intptr_t preload_characters,
3402 bool next_expects_preload) { 3278 bool next_expects_preload) {
3403 if (!alt_gen->possible_success.IsLinked()) return; 3279 if (!alt_gen->possible_success.IsLinked()) return;
3404 3280
3405 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3281 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3406 macro_assembler->BindBlock(&alt_gen->possible_success); 3282 macro_assembler->BindBlock(&alt_gen->possible_success);
(...skipping 19 matching lines...) Expand all
3426 macro_assembler->GoTo(&(alt_gen->after)); 3302 macro_assembler->GoTo(&(alt_gen->after));
3427 } else { 3303 } else {
3428 out_of_line_trace.set_backtrack(&(alt_gen->after)); 3304 out_of_line_trace.set_backtrack(&(alt_gen->after));
3429 for (intptr_t j = 0; j < guard_count; j++) { 3305 for (intptr_t j = 0; j < guard_count; j++) {
3430 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); 3306 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);
3431 } 3307 }
3432 alternative.node()->Emit(compiler, &out_of_line_trace); 3308 alternative.node()->Emit(compiler, &out_of_line_trace);
3433 } 3309 }
3434 } 3310 }
3435 3311
3436
3437 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3312 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3438 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3313 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3439 LimitResult limit_result = LimitVersions(compiler, trace); 3314 LimitResult limit_result = LimitVersions(compiler, trace);
3440 if (limit_result == DONE) return; 3315 if (limit_result == DONE) return;
3441 ASSERT(limit_result == CONTINUE); 3316 ASSERT(limit_result == CONTINUE);
3442 3317
3443 RecursionCheck rc(compiler); 3318 RecursionCheck rc(compiler);
3444 3319
3445 switch (action_type_) { 3320 switch (action_type_) {
3446 case STORE_POSITION: { 3321 case STORE_POSITION: {
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
3547 3422
3548 ASSERT(trace->backtrack() == NULL); 3423 ASSERT(trace->backtrack() == NULL);
3549 assembler->Backtrack(); 3424 assembler->Backtrack();
3550 return; 3425 return;
3551 } 3426 }
3552 default: 3427 default:
3553 UNREACHABLE(); 3428 UNREACHABLE();
3554 } 3429 }
3555 } 3430 }
3556 3431
3557
3558 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3432 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3559 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3433 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3560 if (!trace->is_trivial()) { 3434 if (!trace->is_trivial()) {
3561 trace->Flush(compiler, this); 3435 trace->Flush(compiler, this);
3562 return; 3436 return;
3563 } 3437 }
3564 3438
3565 LimitResult limit_result = LimitVersions(compiler, trace); 3439 LimitResult limit_result = LimitVersions(compiler, trace);
3566 if (limit_result == DONE) return; 3440 if (limit_result == DONE) return;
3567 ASSERT(limit_result == CONTINUE); 3441 ASSERT(limit_result == CONTINUE);
3568 3442
3569 RecursionCheck rc(compiler); 3443 RecursionCheck rc(compiler);
3570 3444
3571 ASSERT(start_reg_ + 1 == end_reg_); 3445 ASSERT(start_reg_ + 1 == end_reg_);
3572 if (compiler->ignore_case()) { 3446 if (compiler->ignore_case()) {
3573 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); 3447 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack());
3574 } else { 3448 } else {
3575 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); 3449 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3576 } 3450 }
3577 on_success()->Emit(compiler, trace); 3451 on_success()->Emit(compiler, trace);
3578 } 3452 }
3579 3453
3580
3581 // ------------------------------------------------------------------- 3454 // -------------------------------------------------------------------
3582 // Dot/dotty output 3455 // Dot/dotty output
3583 3456
3584
3585 #ifdef DEBUG 3457 #ifdef DEBUG
3586 3458
3587
3588 class DotPrinter : public NodeVisitor { 3459 class DotPrinter : public NodeVisitor {
3589 public: 3460 public:
3590 explicit DotPrinter(bool ignore_case) {} 3461 explicit DotPrinter(bool ignore_case) {}
3591 void PrintNode(const char* label, RegExpNode* node); 3462 void PrintNode(const char* label, RegExpNode* node);
3592 void Visit(RegExpNode* node); 3463 void Visit(RegExpNode* node);
3593 void PrintAttributes(RegExpNode* from); 3464 void PrintAttributes(RegExpNode* from);
3594 void PrintOnFailure(RegExpNode* from, RegExpNode* to); 3465 void PrintOnFailure(RegExpNode* from, RegExpNode* to);
3595 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); 3466 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
3596 FOR_EACH_NODE_TYPE(DECLARE_VISIT) 3467 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3597 #undef DECLARE_VISIT 3468 #undef DECLARE_VISIT
3598 }; 3469 };
3599 3470
3600
3601 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { 3471 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
3602 OS::Print("digraph G {\n graph [label=\""); 3472 OS::Print("digraph G {\n graph [label=\"");
3603 for (intptr_t i = 0; label[i]; i++) { 3473 for (intptr_t i = 0; label[i]; i++) {
3604 switch (label[i]) { 3474 switch (label[i]) {
3605 case '\\': 3475 case '\\':
3606 OS::Print("\\\\"); 3476 OS::Print("\\\\");
3607 break; 3477 break;
3608 case '"': 3478 case '"':
3609 OS::Print("\""); 3479 OS::Print("\"");
3610 break; 3480 break;
3611 default: 3481 default:
3612 OS::Print("%c", label[i]); 3482 OS::Print("%c", label[i]);
3613 break; 3483 break;
3614 } 3484 }
3615 } 3485 }
3616 OS::Print("\"];\n"); 3486 OS::Print("\"];\n");
3617 Visit(node); 3487 Visit(node);
3618 OS::Print("}\n"); 3488 OS::Print("}\n");
3619 } 3489 }
3620 3490
3621
3622 void DotPrinter::Visit(RegExpNode* node) { 3491 void DotPrinter::Visit(RegExpNode* node) {
3623 if (node->info()->visited) return; 3492 if (node->info()->visited) return;
3624 node->info()->visited = true; 3493 node->info()->visited = true;
3625 node->Accept(this); 3494 node->Accept(this);
3626 } 3495 }
3627 3496
3628
3629 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { 3497 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
3630 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); 3498 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure);
3631 Visit(on_failure); 3499 Visit(on_failure);
3632 } 3500 }
3633 3501
3634
3635 class AttributePrinter : public ValueObject { 3502 class AttributePrinter : public ValueObject {
3636 public: 3503 public:
3637 AttributePrinter() : first_(true) {} 3504 AttributePrinter() : first_(true) {}
3638 void PrintSeparator() { 3505 void PrintSeparator() {
3639 if (first_) { 3506 if (first_) {
3640 first_ = false; 3507 first_ = false;
3641 } else { 3508 } else {
3642 OS::Print("|"); 3509 OS::Print("|");
3643 } 3510 }
3644 } 3511 }
3645 void PrintBit(const char* name, bool value) { 3512 void PrintBit(const char* name, bool value) {
3646 if (!value) return; 3513 if (!value) return;
3647 PrintSeparator(); 3514 PrintSeparator();
3648 OS::Print("{%s}", name); 3515 OS::Print("{%s}", name);
3649 } 3516 }
3650 void PrintPositive(const char* name, intptr_t value) { 3517 void PrintPositive(const char* name, intptr_t value) {
3651 if (value < 0) return; 3518 if (value < 0) return;
3652 PrintSeparator(); 3519 PrintSeparator();
3653 OS::Print("{%s|%" Pd "}", name, value); 3520 OS::Print("{%s|%" Pd "}", name, value);
3654 } 3521 }
3655 3522
3656 private: 3523 private:
3657 bool first_; 3524 bool first_;
3658 }; 3525 };
3659 3526
3660
3661 void DotPrinter::PrintAttributes(RegExpNode* that) { 3527 void DotPrinter::PrintAttributes(RegExpNode* that) {
3662 OS::Print( 3528 OS::Print(
3663 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " 3529 " a%p [shape=Mrecord, color=grey, fontcolor=grey, "
3664 "margin=0.1, fontsize=10, label=\"{", 3530 "margin=0.1, fontsize=10, label=\"{",
3665 that); 3531 that);
3666 AttributePrinter printer; 3532 AttributePrinter printer;
3667 NodeInfo* info = that->info(); 3533 NodeInfo* info = that->info();
3668 printer.PrintBit("NI", info->follows_newline_interest); 3534 printer.PrintBit("NI", info->follows_newline_interest);
3669 printer.PrintBit("WI", info->follows_word_interest); 3535 printer.PrintBit("WI", info->follows_word_interest);
3670 printer.PrintBit("SI", info->follows_start_interest); 3536 printer.PrintBit("SI", info->follows_start_interest);
3671 BlockLabel* label = that->label(); 3537 BlockLabel* label = that->label();
3672 if (label->IsBound()) printer.PrintPositive("@", label->Position()); 3538 if (label->IsBound()) printer.PrintPositive("@", label->Position());
3673 OS::Print( 3539 OS::Print(
3674 "}\"];\n" 3540 "}\"];\n"
3675 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", 3541 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",
3676 that, that); 3542 that, that);
3677 } 3543 }
3678 3544
3679
3680 void DotPrinter::VisitChoice(ChoiceNode* that) { 3545 void DotPrinter::VisitChoice(ChoiceNode* that) {
3681 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); 3546 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that);
3682 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 3547 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
3683 GuardedAlternative alt = that->alternatives()->At(i); 3548 GuardedAlternative alt = that->alternatives()->At(i);
3684 OS::Print(" n%p -> n%p", that, alt.node()); 3549 OS::Print(" n%p -> n%p", that, alt.node());
3685 } 3550 }
3686 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 3551 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
3687 GuardedAlternative alt = that->alternatives()->At(i); 3552 GuardedAlternative alt = that->alternatives()->At(i);
3688 alt.node()->Accept(this); 3553 alt.node()->Accept(this);
3689 } 3554 }
3690 } 3555 }
3691 3556
3692
3693 void DotPrinter::VisitText(TextNode* that) { 3557 void DotPrinter::VisitText(TextNode* that) {
3694 OS::Print(" n%p [label=\"", that); 3558 OS::Print(" n%p [label=\"", that);
3695 for (intptr_t i = 0; i < that->elements()->length(); i++) { 3559 for (intptr_t i = 0; i < that->elements()->length(); i++) {
3696 if (i > 0) OS::Print(" "); 3560 if (i > 0) OS::Print(" ");
3697 TextElement elm = that->elements()->At(i); 3561 TextElement elm = that->elements()->At(i);
3698 switch (elm.text_type()) { 3562 switch (elm.text_type()) {
3699 case TextElement::ATOM: { 3563 case TextElement::ATOM: {
3700 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); 3564 ZoneGrowableArray<uint16_t>* data = elm.atom()->data();
3701 for (intptr_t i = 0; i < data->length(); i++) { 3565 for (intptr_t i = 0; i < data->length(); i++) {
3702 OS::Print("%c", static_cast<char>(data->At(i))); 3566 OS::Print("%c", static_cast<char>(data->At(i)));
(...skipping 16 matching lines...) Expand all
3719 default: 3583 default:
3720 UNREACHABLE(); 3584 UNREACHABLE();
3721 } 3585 }
3722 } 3586 }
3723 OS::Print("\", shape=box, peripheries=2];\n"); 3587 OS::Print("\", shape=box, peripheries=2];\n");
3724 PrintAttributes(that); 3588 PrintAttributes(that);
3725 OS::Print(" n%p -> n%p;\n", that, that->on_success()); 3589 OS::Print(" n%p -> n%p;\n", that, that->on_success());
3726 Visit(that->on_success()); 3590 Visit(that->on_success());
3727 } 3591 }
3728 3592
3729
3730 void DotPrinter::VisitBackReference(BackReferenceNode* that) { 3593 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
3731 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", 3594 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n",
3732 that, that->start_register(), that->end_register()); 3595 that, that->start_register(), that->end_register());
3733 PrintAttributes(that); 3596 PrintAttributes(that);
3734 OS::Print(" n%p -> n%p;\n", that, that->on_success()); 3597 OS::Print(" n%p -> n%p;\n", that, that->on_success());
3735 Visit(that->on_success()); 3598 Visit(that->on_success());
3736 } 3599 }
3737 3600
3738
3739 void DotPrinter::VisitEnd(EndNode* that) { 3601 void DotPrinter::VisitEnd(EndNode* that) {
3740 OS::Print(" n%p [style=bold, shape=point];\n", that); 3602 OS::Print(" n%p [style=bold, shape=point];\n", that);
3741 PrintAttributes(that); 3603 PrintAttributes(that);
3742 } 3604 }
3743 3605
3744
3745 void DotPrinter::VisitAssertion(AssertionNode* that) { 3606 void DotPrinter::VisitAssertion(AssertionNode* that) {
3746 OS::Print(" n%p [", that); 3607 OS::Print(" n%p [", that);
3747 switch (that->assertion_type()) { 3608 switch (that->assertion_type()) {
3748 case AssertionNode::AT_END: 3609 case AssertionNode::AT_END:
3749 OS::Print("label=\"$\", shape=septagon"); 3610 OS::Print("label=\"$\", shape=septagon");
3750 break; 3611 break;
3751 case AssertionNode::AT_START: 3612 case AssertionNode::AT_START:
3752 OS::Print("label=\"^\", shape=septagon"); 3613 OS::Print("label=\"^\", shape=septagon");
3753 break; 3614 break;
3754 case AssertionNode::AT_BOUNDARY: 3615 case AssertionNode::AT_BOUNDARY:
3755 OS::Print("label=\"\\b\", shape=septagon"); 3616 OS::Print("label=\"\\b\", shape=septagon");
3756 break; 3617 break;
3757 case AssertionNode::AT_NON_BOUNDARY: 3618 case AssertionNode::AT_NON_BOUNDARY:
3758 OS::Print("label=\"\\B\", shape=septagon"); 3619 OS::Print("label=\"\\B\", shape=septagon");
3759 break; 3620 break;
3760 case AssertionNode::AFTER_NEWLINE: 3621 case AssertionNode::AFTER_NEWLINE:
3761 OS::Print("label=\"(?<=\\n)\", shape=septagon"); 3622 OS::Print("label=\"(?<=\\n)\", shape=septagon");
3762 break; 3623 break;
3763 } 3624 }
3764 OS::Print("];\n"); 3625 OS::Print("];\n");
3765 PrintAttributes(that); 3626 PrintAttributes(that);
3766 RegExpNode* successor = that->on_success(); 3627 RegExpNode* successor = that->on_success();
3767 OS::Print(" n%p -> n%p;\n", that, successor); 3628 OS::Print(" n%p -> n%p;\n", that, successor);
3768 Visit(successor); 3629 Visit(successor);
3769 } 3630 }
3770 3631
3771
3772 void DotPrinter::VisitAction(ActionNode* that) { 3632 void DotPrinter::VisitAction(ActionNode* that) {
3773 OS::Print(" n%p [", that); 3633 OS::Print(" n%p [", that);
3774 switch (that->action_type_) { 3634 switch (that->action_type_) {
3775 case ActionNode::SET_REGISTER: 3635 case ActionNode::SET_REGISTER:
3776 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", 3636 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon",
3777 that->data_.u_store_register.reg, 3637 that->data_.u_store_register.reg,
3778 that->data_.u_store_register.value); 3638 that->data_.u_store_register.value);
3779 break; 3639 break;
3780 case ActionNode::INCREMENT_REGISTER: 3640 case ActionNode::INCREMENT_REGISTER:
3781 OS::Print("label=\"$%" Pd "++\", shape=octagon", 3641 OS::Print("label=\"$%" Pd "++\", shape=octagon",
(...skipping 23 matching lines...) Expand all
3805 break; 3665 break;
3806 } 3666 }
3807 } 3667 }
3808 OS::Print("];\n"); 3668 OS::Print("];\n");
3809 PrintAttributes(that); 3669 PrintAttributes(that);
3810 RegExpNode* successor = that->on_success(); 3670 RegExpNode* successor = that->on_success();
3811 OS::Print(" n%p -> n%p;\n", that, successor); 3671 OS::Print(" n%p -> n%p;\n", that, successor);
3812 Visit(successor); 3672 Visit(successor);
3813 } 3673 }
3814 3674
3815
3816 void RegExpEngine::DotPrint(const char* label, 3675 void RegExpEngine::DotPrint(const char* label,
3817 RegExpNode* node, 3676 RegExpNode* node,
3818 bool ignore_case) { 3677 bool ignore_case) {
3819 DotPrinter printer(ignore_case); 3678 DotPrinter printer(ignore_case);
3820 printer.PrintNode(label, node); 3679 printer.PrintNode(label, node);
3821 } 3680 }
3822 3681
3823
3824 #endif // DEBUG 3682 #endif // DEBUG
3825 3683
3826
3827 // ------------------------------------------------------------------- 3684 // -------------------------------------------------------------------
3828 // Tree to graph conversion 3685 // Tree to graph conversion
3829 3686
3830 // The zone in which we allocate graph nodes. 3687 // The zone in which we allocate graph nodes.
3831 #define OZ (on_success->zone()) 3688 #define OZ (on_success->zone())
3832 3689
3833 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 3690 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
3834 RegExpNode* on_success) { 3691 RegExpNode* on_success) {
3835 ZoneGrowableArray<TextElement>* elms = 3692 ZoneGrowableArray<TextElement>* elms =
3836 new (OZ) ZoneGrowableArray<TextElement>(1); 3693 new (OZ) ZoneGrowableArray<TextElement>(1);
3837 elms->Add(TextElement::Atom(this)); 3694 elms->Add(TextElement::Atom(this));
3838 return new (OZ) TextNode(elms, on_success); 3695 return new (OZ) TextNode(elms, on_success);
3839 } 3696 }
3840 3697
3841
3842 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 3698 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
3843 RegExpNode* on_success) { 3699 RegExpNode* on_success) {
3844 ZoneGrowableArray<TextElement>* elms = 3700 ZoneGrowableArray<TextElement>* elms =
3845 new (OZ) ZoneGrowableArray<TextElement>(1); 3701 new (OZ) ZoneGrowableArray<TextElement>(1);
3846 for (intptr_t i = 0; i < elements()->length(); i++) { 3702 for (intptr_t i = 0; i < elements()->length(); i++) {
3847 elms->Add(elements()->At(i)); 3703 elms->Add(elements()->At(i));
3848 } 3704 }
3849 return new (OZ) TextNode(elms, on_success); 3705 return new (OZ) TextNode(elms, on_success);
3850 } 3706 }
3851 3707
3852
3853 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, 3708 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges,
3854 const intptr_t* special_class, 3709 const intptr_t* special_class,
3855 intptr_t length) { 3710 intptr_t length) {
3856 length--; // Remove final 0x10000. 3711 length--; // Remove final 0x10000.
3857 ASSERT(special_class[length] == 0x10000); 3712 ASSERT(special_class[length] == 0x10000);
3858 ASSERT(ranges->length() != 0); 3713 ASSERT(ranges->length() != 0);
3859 ASSERT(length != 0); 3714 ASSERT(length != 0);
3860 ASSERT(special_class[0] != 0); 3715 ASSERT(special_class[0] != 0);
3861 if (ranges->length() != (length >> 1) + 1) { 3716 if (ranges->length() != (length >> 1) + 1) {
3862 return false; 3717 return false;
(...skipping 10 matching lines...) Expand all
3873 if (special_class[i + 1] != range.from()) { 3728 if (special_class[i + 1] != range.from()) {
3874 return false; 3729 return false;
3875 } 3730 }
3876 } 3731 }
3877 if (range.to() != 0xffff) { 3732 if (range.to() != 0xffff) {
3878 return false; 3733 return false;
3879 } 3734 }
3880 return true; 3735 return true;
3881 } 3736 }
3882 3737
3883
3884 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, 3738 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges,
3885 const intptr_t* special_class, 3739 const intptr_t* special_class,
3886 intptr_t length) { 3740 intptr_t length) {
3887 length--; // Remove final 0x10000. 3741 length--; // Remove final 0x10000.
3888 ASSERT(special_class[length] == 0x10000); 3742 ASSERT(special_class[length] == 0x10000);
3889 if (ranges->length() * 2 != length) { 3743 if (ranges->length() * 2 != length) {
3890 return false; 3744 return false;
3891 } 3745 }
3892 for (intptr_t i = 0; i < length; i += 2) { 3746 for (intptr_t i = 0; i < length; i += 2) {
3893 CharacterRange range = ranges->At(i >> 1); 3747 CharacterRange range = ranges->At(i >> 1);
3894 if (range.from() != special_class[i] || 3748 if (range.from() != special_class[i] ||
3895 range.to() != special_class[i + 1] - 1) { 3749 range.to() != special_class[i + 1] - 1) {
3896 return false; 3750 return false;
3897 } 3751 }
3898 } 3752 }
3899 return true; 3753 return true;
3900 } 3754 }
3901 3755
3902
3903 bool RegExpCharacterClass::is_standard() { 3756 bool RegExpCharacterClass::is_standard() {
3904 // TODO(lrn): Remove need for this function, by not throwing away information 3757 // TODO(lrn): Remove need for this function, by not throwing away information
3905 // along the way. 3758 // along the way.
3906 if (is_negated_) { 3759 if (is_negated_) {
3907 return false; 3760 return false;
3908 } 3761 }
3909 if (set_.is_standard()) { 3762 if (set_.is_standard()) {
3910 return true; 3763 return true;
3911 } 3764 }
3912 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { 3765 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
(...skipping 18 matching lines...) Expand all
3931 set_.set_standard_set_type('w'); 3784 set_.set_standard_set_type('w');
3932 return true; 3785 return true;
3933 } 3786 }
3934 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { 3787 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
3935 set_.set_standard_set_type('W'); 3788 set_.set_standard_set_type('W');
3936 return true; 3789 return true;
3937 } 3790 }
3938 return false; 3791 return false;
3939 } 3792 }
3940 3793
3941
3942 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 3794 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
3943 RegExpNode* on_success) { 3795 RegExpNode* on_success) {
3944 return new (OZ) TextNode(this, on_success); 3796 return new (OZ) TextNode(this, on_success);
3945 } 3797 }
3946 3798
3947
3948 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 3799 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
3949 RegExpNode* on_success) { 3800 RegExpNode* on_success) {
3950 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); 3801 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
3951 intptr_t length = alternatives->length(); 3802 intptr_t length = alternatives->length();
3952 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); 3803 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ);
3953 for (intptr_t i = 0; i < length; i++) { 3804 for (intptr_t i = 0; i < length; i++) {
3954 GuardedAlternative alternative( 3805 GuardedAlternative alternative(
3955 alternatives->At(i)->ToNode(compiler, on_success)); 3806 alternatives->At(i)->ToNode(compiler, on_success));
3956 result->AddAlternative(alternative); 3807 result->AddAlternative(alternative);
3957 } 3808 }
3958 return result; 3809 return result;
3959 } 3810 }
3960 3811
3961
3962 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, 3812 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
3963 RegExpNode* on_success) { 3813 RegExpNode* on_success) {
3964 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); 3814 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success);
3965 } 3815 }
3966 3816
3967
3968 // Scoped object to keep track of how much we unroll quantifier loops in the 3817 // Scoped object to keep track of how much we unroll quantifier loops in the
3969 // regexp graph generator. 3818 // regexp graph generator.
3970 class RegExpExpansionLimiter : public ValueObject { 3819 class RegExpExpansionLimiter : public ValueObject {
3971 public: 3820 public:
3972 static const intptr_t kMaxExpansionFactor = 6; 3821 static const intptr_t kMaxExpansionFactor = 6;
3973 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) 3822 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)
3974 : compiler_(compiler), 3823 : compiler_(compiler),
3975 saved_expansion_factor_(compiler->current_expansion_factor()), 3824 saved_expansion_factor_(compiler->current_expansion_factor()),
3976 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { 3825 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
3977 ASSERT(factor > 0); 3826 ASSERT(factor > 0);
(...skipping 17 matching lines...) Expand all
3995 bool ok_to_expand() { return ok_to_expand_; } 3844 bool ok_to_expand() { return ok_to_expand_; }
3996 3845
3997 private: 3846 private:
3998 RegExpCompiler* compiler_; 3847 RegExpCompiler* compiler_;
3999 intptr_t saved_expansion_factor_; 3848 intptr_t saved_expansion_factor_;
4000 bool ok_to_expand_; 3849 bool ok_to_expand_;
4001 3850
4002 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); 3851 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
4003 }; 3852 };
4004 3853
4005
4006 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, 3854 RegExpNode* RegExpQuantifier::ToNode(intptr_t min,
4007 intptr_t max, 3855 intptr_t max,
4008 bool is_greedy, 3856 bool is_greedy,
4009 RegExpTree* body, 3857 RegExpTree* body,
4010 RegExpCompiler* compiler, 3858 RegExpCompiler* compiler,
4011 RegExpNode* on_success, 3859 RegExpNode* on_success,
4012 bool not_at_start) { 3860 bool not_at_start) {
4013 // x{f, t} becomes this: 3861 // x{f, t} becomes this:
4014 // 3862 //
4015 // (r++)<-. 3863 // (r++)<-.
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
4132 center->AddContinueAlternative(rest_alt); 3980 center->AddContinueAlternative(rest_alt);
4133 center->AddLoopAlternative(body_alt); 3981 center->AddLoopAlternative(body_alt);
4134 } 3982 }
4135 if (needs_counter) { 3983 if (needs_counter) {
4136 return ActionNode::SetRegister(reg_ctr, 0, center); 3984 return ActionNode::SetRegister(reg_ctr, 0, center);
4137 } else { 3985 } else {
4138 return center; 3986 return center;
4139 } 3987 }
4140 } 3988 }
4141 3989
4142
4143 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 3990 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
4144 RegExpNode* on_success) { 3991 RegExpNode* on_success) {
4145 switch (assertion_type()) { 3992 switch (assertion_type()) {
4146 case START_OF_LINE: 3993 case START_OF_LINE:
4147 return AssertionNode::AfterNewline(on_success); 3994 return AssertionNode::AfterNewline(on_success);
4148 case START_OF_INPUT: 3995 case START_OF_INPUT:
4149 return AssertionNode::AtStart(on_success); 3996 return AssertionNode::AtStart(on_success);
4150 case BOUNDARY: 3997 case BOUNDARY:
4151 return AssertionNode::AtBoundary(on_success); 3998 return AssertionNode::AtBoundary(on_success);
4152 case NON_BOUNDARY: 3999 case NON_BOUNDARY:
(...skipping 28 matching lines...) Expand all
4181 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); 4028 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
4182 result->AddAlternative(end_alternative); 4029 result->AddAlternative(end_alternative);
4183 return result; 4030 return result;
4184 } 4031 }
4185 default: 4032 default:
4186 UNREACHABLE(); 4033 UNREACHABLE();
4187 } 4034 }
4188 return on_success; 4035 return on_success;
4189 } 4036 }
4190 4037
4191
4192 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 4038 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
4193 RegExpNode* on_success) { 4039 RegExpNode* on_success) {
4194 return new (OZ) 4040 return new (OZ)
4195 BackReferenceNode(RegExpCapture::StartRegister(index()), 4041 BackReferenceNode(RegExpCapture::StartRegister(index()),
4196 RegExpCapture::EndRegister(index()), on_success); 4042 RegExpCapture::EndRegister(index()), on_success);
4197 } 4043 }
4198 4044
4199
4200 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 4045 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
4201 RegExpNode* on_success) { 4046 RegExpNode* on_success) {
4202 return on_success; 4047 return on_success;
4203 } 4048 }
4204 4049
4205
4206 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 4050 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
4207 RegExpNode* on_success) { 4051 RegExpNode* on_success) {
4208 intptr_t stack_pointer_register = compiler->AllocateRegister(); 4052 intptr_t stack_pointer_register = compiler->AllocateRegister();
4209 intptr_t position_register = compiler->AllocateRegister(); 4053 intptr_t position_register = compiler->AllocateRegister();
4210 4054
4211 const intptr_t registers_per_capture = 2; 4055 const intptr_t registers_per_capture = 2;
4212 const intptr_t register_of_first_capture = 2; 4056 const intptr_t register_of_first_capture = 2;
4213 intptr_t register_count = capture_count_ * registers_per_capture; 4057 intptr_t register_count = capture_count_ * registers_per_capture;
4214 intptr_t register_start = 4058 intptr_t register_start =
4215 register_of_first_capture + capture_from_ * registers_per_capture; 4059 register_of_first_capture + capture_from_ * registers_per_capture;
(...skipping 23 matching lines...) Expand all
4239 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( 4083 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess(
4240 stack_pointer_register, position_register, 4084 stack_pointer_register, position_register,
4241 register_count, register_start, OZ))); 4085 register_count, register_start, OZ)));
4242 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( 4086 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode(
4243 body_alt, GuardedAlternative(on_success), OZ); 4087 body_alt, GuardedAlternative(on_success), OZ);
4244 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, 4088 return ActionNode::BeginSubmatch(stack_pointer_register, position_register,
4245 choice_node); 4089 choice_node);
4246 } 4090 }
4247 } 4091 }
4248 4092
4249
4250 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 4093 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
4251 RegExpNode* on_success) { 4094 RegExpNode* on_success) {
4252 return ToNode(body(), index(), compiler, on_success); 4095 return ToNode(body(), index(), compiler, on_success);
4253 } 4096 }
4254 4097
4255
4256 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, 4098 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
4257 intptr_t index, 4099 intptr_t index,
4258 RegExpCompiler* compiler, 4100 RegExpCompiler* compiler,
4259 RegExpNode* on_success) { 4101 RegExpNode* on_success) {
4260 intptr_t start_reg = RegExpCapture::StartRegister(index); 4102 intptr_t start_reg = RegExpCapture::StartRegister(index);
4261 intptr_t end_reg = RegExpCapture::EndRegister(index); 4103 intptr_t end_reg = RegExpCapture::EndRegister(index);
4262 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); 4104 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
4263 RegExpNode* body_node = body->ToNode(compiler, store_end); 4105 RegExpNode* body_node = body->ToNode(compiler, store_end);
4264 return ActionNode::StorePosition(start_reg, true, body_node); 4106 return ActionNode::StorePosition(start_reg, true, body_node);
4265 } 4107 }
4266 4108
4267
4268 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 4109 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
4269 RegExpNode* on_success) { 4110 RegExpNode* on_success) {
4270 ZoneGrowableArray<RegExpTree*>* children = nodes(); 4111 ZoneGrowableArray<RegExpTree*>* children = nodes();
4271 RegExpNode* current = on_success; 4112 RegExpNode* current = on_success;
4272 for (intptr_t i = children->length() - 1; i >= 0; i--) { 4113 for (intptr_t i = children->length() - 1; i >= 0; i--) {
4273 current = children->At(i)->ToNode(compiler, current); 4114 current = children->At(i)->ToNode(compiler, current);
4274 } 4115 }
4275 return current; 4116 return current;
4276 } 4117 }
4277 4118
4278
4279 static void AddClass(const intptr_t* elmv, 4119 static void AddClass(const intptr_t* elmv,
4280 intptr_t elmc, 4120 intptr_t elmc,
4281 ZoneGrowableArray<CharacterRange>* ranges) { 4121 ZoneGrowableArray<CharacterRange>* ranges) {
4282 elmc--; 4122 elmc--;
4283 ASSERT(elmv[elmc] == 0x10000); 4123 ASSERT(elmv[elmc] == 0x10000);
4284 for (intptr_t i = 0; i < elmc; i += 2) { 4124 for (intptr_t i = 0; i < elmc; i += 2) {
4285 ASSERT(elmv[i] < elmv[i + 1]); 4125 ASSERT(elmv[i] < elmv[i + 1]);
4286 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); 4126 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1));
4287 } 4127 }
4288 } 4128 }
4289 4129
4290
4291 static void AddClassNegated(const intptr_t* elmv, 4130 static void AddClassNegated(const intptr_t* elmv,
4292 intptr_t elmc, 4131 intptr_t elmc,
4293 ZoneGrowableArray<CharacterRange>* ranges) { 4132 ZoneGrowableArray<CharacterRange>* ranges) {
4294 elmc--; 4133 elmc--;
4295 ASSERT(elmv[elmc] == 0x10000); 4134 ASSERT(elmv[elmc] == 0x10000);
4296 ASSERT(elmv[0] != 0x0000); 4135 ASSERT(elmv[0] != 0x0000);
4297 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); 4136 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit);
4298 uint16_t last = 0x0000; 4137 uint16_t last = 0x0000;
4299 for (intptr_t i = 0; i < elmc; i += 2) { 4138 for (intptr_t i = 0; i < elmc; i += 2) {
4300 ASSERT(last <= elmv[i] - 1); 4139 ASSERT(last <= elmv[i] - 1);
4301 ASSERT(elmv[i] < elmv[i + 1]); 4140 ASSERT(elmv[i] < elmv[i + 1]);
4302 ranges->Add(CharacterRange(last, elmv[i] - 1)); 4141 ranges->Add(CharacterRange(last, elmv[i] - 1));
4303 last = elmv[i + 1]; 4142 last = elmv[i + 1];
4304 } 4143 }
4305 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); 4144 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit));
4306 } 4145 }
4307 4146
4308
4309 void CharacterRange::AddClassEscape(uint16_t type, 4147 void CharacterRange::AddClassEscape(uint16_t type,
4310 ZoneGrowableArray<CharacterRange>* ranges) { 4148 ZoneGrowableArray<CharacterRange>* ranges) {
4311 switch (type) { 4149 switch (type) {
4312 case 's': 4150 case 's':
4313 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); 4151 AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
4314 break; 4152 break;
4315 case 'S': 4153 case 'S':
4316 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); 4154 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
4317 break; 4155 break;
4318 case 'w': 4156 case 'w':
(...skipping 20 matching lines...) Expand all
4339 // This is the set of characters matched by the $ and ^ symbols 4177 // This is the set of characters matched by the $ and ^ symbols
4340 // in multiline mode. 4178 // in multiline mode.
4341 case 'n': 4179 case 'n':
4342 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); 4180 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
4343 break; 4181 break;
4344 default: 4182 default:
4345 UNREACHABLE(); 4183 UNREACHABLE();
4346 } 4184 }
4347 } 4185 }
4348 4186
4349
4350 void CharacterRange::AddCaseEquivalents( 4187 void CharacterRange::AddCaseEquivalents(
4351 ZoneGrowableArray<CharacterRange>* ranges, 4188 ZoneGrowableArray<CharacterRange>* ranges,
4352 bool is_one_byte, 4189 bool is_one_byte,
4353 Zone* zone) { 4190 Zone* zone) {
4354 uint16_t bottom = from(); 4191 uint16_t bottom = from();
4355 uint16_t top = to(); 4192 uint16_t top = to();
4356 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { 4193 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
4357 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; 4194 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;
4358 if (top > Symbols::kMaxOneCharCodeSymbol) { 4195 if (top > Symbols::kMaxOneCharCodeSymbol) {
4359 top = Symbols::kMaxOneCharCodeSymbol; 4196 top = Symbols::kMaxOneCharCodeSymbol;
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
4411 uint16_t range_to = c - (block_end - end); 4248 uint16_t range_to = c - (block_end - end);
4412 if (!(bottom <= range_from && range_to <= top)) { 4249 if (!(bottom <= range_from && range_to <= top)) {
4413 ranges->Add(CharacterRange(range_from, range_to)); 4250 ranges->Add(CharacterRange(range_from, range_to));
4414 } 4251 }
4415 } 4252 }
4416 pos = end + 1; 4253 pos = end + 1;
4417 } 4254 }
4418 } 4255 }
4419 } 4256 }
4420 4257
4421
4422 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { 4258 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) {
4423 ASSERT(ranges != NULL); 4259 ASSERT(ranges != NULL);
4424 intptr_t n = ranges->length(); 4260 intptr_t n = ranges->length();
4425 if (n <= 1) return true; 4261 if (n <= 1) return true;
4426 intptr_t max = ranges->At(0).to(); 4262 intptr_t max = ranges->At(0).to();
4427 for (intptr_t i = 1; i < n; i++) { 4263 for (intptr_t i = 1; i < n; i++) {
4428 CharacterRange next_range = ranges->At(i); 4264 CharacterRange next_range = ranges->At(i);
4429 if (next_range.from() <= max + 1) return false; 4265 if (next_range.from() <= max + 1) return false;
4430 max = next_range.to(); 4266 max = next_range.to();
4431 } 4267 }
4432 return true; 4268 return true;
4433 } 4269 }
4434 4270
4435
4436 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { 4271 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() {
4437 if (ranges_ == NULL) { 4272 if (ranges_ == NULL) {
4438 ranges_ = new ZoneGrowableArray<CharacterRange>(2); 4273 ranges_ = new ZoneGrowableArray<CharacterRange>(2);
4439 CharacterRange::AddClassEscape(standard_set_type_, ranges_); 4274 CharacterRange::AddClassEscape(standard_set_type_, ranges_);
4440 } 4275 }
4441 return ranges_; 4276 return ranges_;
4442 } 4277 }
4443 4278
4444
4445 // Move a number of elements in a zone array to another position 4279 // Move a number of elements in a zone array to another position
4446 // in the same array. Handles overlapping source and target areas. 4280 // in the same array. Handles overlapping source and target areas.
4447 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, 4281 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list,
4448 intptr_t from, 4282 intptr_t from,
4449 intptr_t to, 4283 intptr_t to,
4450 intptr_t count) { 4284 intptr_t count) {
4451 // Ranges are potentially overlapping. 4285 // Ranges are potentially overlapping.
4452 if (from < to) { 4286 if (from < to) {
4453 for (intptr_t i = count - 1; i >= 0; i--) { 4287 for (intptr_t i = count - 1; i >= 0; i--) {
4454 (*list)[to + i] = list->At(from + i); 4288 (*list)[to + i] = list->At(from + i);
4455 } 4289 }
4456 } else { 4290 } else {
4457 for (intptr_t i = 0; i < count; i++) { 4291 for (intptr_t i = 0; i < count; i++) {
4458 (*list)[to + i] = list->At(from + i); 4292 (*list)[to + i] = list->At(from + i);
4459 } 4293 }
4460 } 4294 }
4461 } 4295 }
4462 4296
4463
4464 static intptr_t InsertRangeInCanonicalList( 4297 static intptr_t InsertRangeInCanonicalList(
4465 ZoneGrowableArray<CharacterRange>* list, 4298 ZoneGrowableArray<CharacterRange>* list,
4466 intptr_t count, 4299 intptr_t count,
4467 CharacterRange insert) { 4300 CharacterRange insert) {
4468 // Inserts a range into list[0..count[, which must be sorted 4301 // Inserts a range into list[0..count[, which must be sorted
4469 // by from value and non-overlapping and non-adjacent, using at most 4302 // by from value and non-overlapping and non-adjacent, using at most
4470 // list[0..count] for the result. Returns the number of resulting 4303 // list[0..count] for the result. Returns the number of resulting
4471 // canonicalized ranges. Inserting a range may collapse existing ranges into 4304 // canonicalized ranges. Inserting a range may collapse existing ranges into
4472 // fewer ranges, so the return value can be anything in the range 1..count+1. 4305 // fewer ranges, so the return value can be anything in the range 1..count+1.
4473 uint16_t from = insert.from(); 4306 uint16_t from = insert.from();
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
4512 4345
4513 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); 4346 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from);
4514 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); 4347 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to);
4515 if (end_pos < count) { 4348 if (end_pos < count) {
4516 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); 4349 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
4517 } 4350 }
4518 (*list)[start_pos] = CharacterRange(new_from, new_to); 4351 (*list)[start_pos] = CharacterRange(new_from, new_to);
4519 return count - (end_pos - start_pos) + 1; 4352 return count - (end_pos - start_pos) + 1;
4520 } 4353 }
4521 4354
4522
4523 void CharacterSet::Canonicalize() { 4355 void CharacterSet::Canonicalize() {
4524 // Special/default classes are always considered canonical. The result 4356 // Special/default classes are always considered canonical. The result
4525 // of calling ranges() will be sorted. 4357 // of calling ranges() will be sorted.
4526 if (ranges_ == NULL) return; 4358 if (ranges_ == NULL) return;
4527 CharacterRange::Canonicalize(ranges_); 4359 CharacterRange::Canonicalize(ranges_);
4528 } 4360 }
4529 4361
4530
4531 void CharacterRange::Canonicalize( 4362 void CharacterRange::Canonicalize(
4532 ZoneGrowableArray<CharacterRange>* character_ranges) { 4363 ZoneGrowableArray<CharacterRange>* character_ranges) {
4533 if (character_ranges->length() <= 1) return; 4364 if (character_ranges->length() <= 1) return;
4534 // Check whether ranges are already canonical (increasing, non-overlapping, 4365 // Check whether ranges are already canonical (increasing, non-overlapping,
4535 // non-adjacent). 4366 // non-adjacent).
4536 intptr_t n = character_ranges->length(); 4367 intptr_t n = character_ranges->length();
4537 intptr_t max = character_ranges->At(0).to(); 4368 intptr_t max = character_ranges->At(0).to();
4538 intptr_t i = 1; 4369 intptr_t i = 1;
4539 while (i < n) { 4370 while (i < n) {
4540 CharacterRange current = character_ranges->At(i); 4371 CharacterRange current = character_ranges->At(i);
(...skipping 16 matching lines...) Expand all
4557 do { 4388 do {
4558 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, 4389 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical,
4559 character_ranges->At(read)); 4390 character_ranges->At(read));
4560 read++; 4391 read++;
4561 } while (read < n); 4392 } while (read < n);
4562 character_ranges->TruncateTo(num_canonical); 4393 character_ranges->TruncateTo(num_canonical);
4563 4394
4564 ASSERT(CharacterRange::IsCanonical(character_ranges)); 4395 ASSERT(CharacterRange::IsCanonical(character_ranges));
4565 } 4396 }
4566 4397
4567
4568 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, 4398 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges,
4569 ZoneGrowableArray<CharacterRange>* negated_ranges) { 4399 ZoneGrowableArray<CharacterRange>* negated_ranges) {
4570 ASSERT(CharacterRange::IsCanonical(ranges)); 4400 ASSERT(CharacterRange::IsCanonical(ranges));
4571 ASSERT(negated_ranges->length() == 0); 4401 ASSERT(negated_ranges->length() == 0);
4572 intptr_t range_count = ranges->length(); 4402 intptr_t range_count = ranges->length();
4573 uint16_t from = 0; 4403 uint16_t from = 0;
4574 intptr_t i = 0; 4404 intptr_t i = 0;
4575 if (range_count > 0 && ranges->At(0).from() == 0) { 4405 if (range_count > 0 && ranges->At(0).from() == 0) {
4576 from = ranges->At(0).to(); 4406 from = ranges->At(0).to();
4577 i = 1; 4407 i = 1;
4578 } 4408 }
4579 while (i < range_count) { 4409 while (i < range_count) {
4580 CharacterRange range = ranges->At(i); 4410 CharacterRange range = ranges->At(i);
4581 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); 4411 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));
4582 from = range.to(); 4412 from = range.to();
4583 i++; 4413 i++;
4584 } 4414 }
4585 if (from < Utf16::kMaxCodeUnit) { 4415 if (from < Utf16::kMaxCodeUnit) {
4586 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); 4416 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit));
4587 } 4417 }
4588 } 4418 }
4589 4419
4590
4591 // ------------------------------------------------------------------- 4420 // -------------------------------------------------------------------
4592 // Splay tree 4421 // Splay tree
4593 4422
4594
4595 // Workaround for the fact that ZoneGrowableArray does not have contains(). 4423 // Workaround for the fact that ZoneGrowableArray does not have contains().
4596 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { 4424 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) {
4597 for (intptr_t i = 0; i < array->length(); i++) { 4425 for (intptr_t i = 0; i < array->length(); i++) {
4598 if (array->At(i) == value) { 4426 if (array->At(i) == value) {
4599 return true; 4427 return true;
4600 } 4428 }
4601 } 4429 }
4602 return false; 4430 return false;
4603 } 4431 }
4604 4432
4605
4606 void OutSet::Set(unsigned value, Zone* zone) { 4433 void OutSet::Set(unsigned value, Zone* zone) {
4607 if (value < kFirstLimit) { 4434 if (value < kFirstLimit) {
4608 first_ |= (1 << value); 4435 first_ |= (1 << value);
4609 } else { 4436 } else {
4610 if (remaining_ == NULL) 4437 if (remaining_ == NULL)
4611 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); 4438 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1);
4612 4439
4613 bool remaining_contains_value = ArrayContains(remaining_, value); 4440 bool remaining_contains_value = ArrayContains(remaining_, value);
4614 if (remaining_->is_empty() || !remaining_contains_value) { 4441 if (remaining_->is_empty() || !remaining_contains_value) {
4615 remaining_->Add(value); 4442 remaining_->Add(value);
4616 } 4443 }
4617 } 4444 }
4618 } 4445 }
4619 4446
4620
4621 bool OutSet::Get(unsigned value) const { 4447 bool OutSet::Get(unsigned value) const {
4622 if (value < kFirstLimit) { 4448 if (value < kFirstLimit) {
4623 return (first_ & (1 << value)) != 0; 4449 return (first_ & (1 << value)) != 0;
4624 } else if (remaining_ == NULL) { 4450 } else if (remaining_ == NULL) {
4625 return false; 4451 return false;
4626 } else { 4452 } else {
4627 return ArrayContains(remaining_, value); 4453 return ArrayContains(remaining_, value);
4628 } 4454 }
4629 } 4455 }
4630 4456
4631
4632 // ------------------------------------------------------------------- 4457 // -------------------------------------------------------------------
4633 // Analysis 4458 // Analysis
4634 4459
4635
4636 void Analysis::EnsureAnalyzed(RegExpNode* that) { 4460 void Analysis::EnsureAnalyzed(RegExpNode* that) {
4637 if (that->info()->been_analyzed || that->info()->being_analyzed) return; 4461 if (that->info()->been_analyzed || that->info()->being_analyzed) return;
4638 that->info()->being_analyzed = true; 4462 that->info()->being_analyzed = true;
4639 that->Accept(this); 4463 that->Accept(this);
4640 that->info()->being_analyzed = false; 4464 that->info()->being_analyzed = false;
4641 that->info()->been_analyzed = true; 4465 that->info()->been_analyzed = true;
4642 } 4466 }
4643 4467
4644
4645 void Analysis::VisitEnd(EndNode* that) { 4468 void Analysis::VisitEnd(EndNode* that) {
4646 // nothing to do 4469 // nothing to do
4647 } 4470 }
4648 4471
4649
4650 void TextNode::CalculateOffsets() { 4472 void TextNode::CalculateOffsets() {
4651 intptr_t element_count = elements()->length(); 4473 intptr_t element_count = elements()->length();
4652 // Set up the offsets of the elements relative to the start. This is a fixed 4474 // Set up the offsets of the elements relative to the start. This is a fixed
4653 // quantity since a TextNode can only contain fixed-width things. 4475 // quantity since a TextNode can only contain fixed-width things.
4654 intptr_t cp_offset = 0; 4476 intptr_t cp_offset = 0;
4655 for (intptr_t i = 0; i < element_count; i++) { 4477 for (intptr_t i = 0; i < element_count; i++) {
4656 TextElement& elm = (*elements())[i]; 4478 TextElement& elm = (*elements())[i];
4657 elm.set_cp_offset(cp_offset); 4479 elm.set_cp_offset(cp_offset);
4658 cp_offset += elm.length(); 4480 cp_offset += elm.length();
4659 } 4481 }
4660 } 4482 }
4661 4483
4662
4663 void Analysis::VisitText(TextNode* that) { 4484 void Analysis::VisitText(TextNode* that) {
4664 if (ignore_case_) { 4485 if (ignore_case_) {
4665 that->MakeCaseIndependent(is_one_byte_); 4486 that->MakeCaseIndependent(is_one_byte_);
4666 } 4487 }
4667 EnsureAnalyzed(that->on_success()); 4488 EnsureAnalyzed(that->on_success());
4668 if (!has_failed()) { 4489 if (!has_failed()) {
4669 that->CalculateOffsets(); 4490 that->CalculateOffsets();
4670 } 4491 }
4671 } 4492 }
4672 4493
4673
4674 void Analysis::VisitAction(ActionNode* that) { 4494 void Analysis::VisitAction(ActionNode* that) {
4675 RegExpNode* target = that->on_success(); 4495 RegExpNode* target = that->on_success();
4676 EnsureAnalyzed(target); 4496 EnsureAnalyzed(target);
4677 if (!has_failed()) { 4497 if (!has_failed()) {
4678 // If the next node is interested in what it follows then this node 4498 // If the next node is interested in what it follows then this node
4679 // has to be interested too so it can pass the information on. 4499 // has to be interested too so it can pass the information on.
4680 that->info()->AddFromFollowing(target->info()); 4500 that->info()->AddFromFollowing(target->info());
4681 } 4501 }
4682 } 4502 }
4683 4503
4684
4685 void Analysis::VisitChoice(ChoiceNode* that) { 4504 void Analysis::VisitChoice(ChoiceNode* that) {
4686 NodeInfo* info = that->info(); 4505 NodeInfo* info = that->info();
4687 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 4506 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
4688 RegExpNode* node = (*that->alternatives())[i].node(); 4507 RegExpNode* node = (*that->alternatives())[i].node();
4689 EnsureAnalyzed(node); 4508 EnsureAnalyzed(node);
4690 if (has_failed()) return; 4509 if (has_failed()) return;
4691 // Anything the following nodes need to know has to be known by 4510 // Anything the following nodes need to know has to be known by
4692 // this node also, so it can pass it on. 4511 // this node also, so it can pass it on.
4693 info->AddFromFollowing(node->info()); 4512 info->AddFromFollowing(node->info());
4694 } 4513 }
4695 } 4514 }
4696 4515
4697
4698 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { 4516 void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
4699 NodeInfo* info = that->info(); 4517 NodeInfo* info = that->info();
4700 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 4518 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
4701 RegExpNode* node = (*that->alternatives())[i].node(); 4519 RegExpNode* node = (*that->alternatives())[i].node();
4702 if (node != that->loop_node()) { 4520 if (node != that->loop_node()) {
4703 EnsureAnalyzed(node); 4521 EnsureAnalyzed(node);
4704 if (has_failed()) return; 4522 if (has_failed()) return;
4705 info->AddFromFollowing(node->info()); 4523 info->AddFromFollowing(node->info());
4706 } 4524 }
4707 } 4525 }
4708 // Check the loop last since it may need the value of this node 4526 // Check the loop last since it may need the value of this node
4709 // to get a correct result. 4527 // to get a correct result.
4710 EnsureAnalyzed(that->loop_node()); 4528 EnsureAnalyzed(that->loop_node());
4711 if (!has_failed()) { 4529 if (!has_failed()) {
4712 info->AddFromFollowing(that->loop_node()->info()); 4530 info->AddFromFollowing(that->loop_node()->info());
4713 } 4531 }
4714 } 4532 }
4715 4533
4716
4717 void Analysis::VisitBackReference(BackReferenceNode* that) { 4534 void Analysis::VisitBackReference(BackReferenceNode* that) {
4718 EnsureAnalyzed(that->on_success()); 4535 EnsureAnalyzed(that->on_success());
4719 } 4536 }
4720 4537
4721
4722 void Analysis::VisitAssertion(AssertionNode* that) { 4538 void Analysis::VisitAssertion(AssertionNode* that) {
4723 EnsureAnalyzed(that->on_success()); 4539 EnsureAnalyzed(that->on_success());
4724 } 4540 }
4725 4541
4726
4727 void BackReferenceNode::FillInBMInfo(intptr_t offset, 4542 void BackReferenceNode::FillInBMInfo(intptr_t offset,
4728 intptr_t budget, 4543 intptr_t budget,
4729 BoyerMooreLookahead* bm, 4544 BoyerMooreLookahead* bm,
4730 bool not_at_start) { 4545 bool not_at_start) {
4731 // Working out the set of characters that a backreference can match is too 4546 // Working out the set of characters that a backreference can match is too
4732 // hard, so we just say that any character can match. 4547 // hard, so we just say that any character can match.
4733 bm->SetRest(offset); 4548 bm->SetRest(offset);
4734 SaveBMInfo(bm, not_at_start, offset); 4549 SaveBMInfo(bm, not_at_start, offset);
4735 } 4550 }
4736 4551
4737
4738 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == 4552 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize ==
4739 RegExpMacroAssembler::kTableSize); 4553 RegExpMacroAssembler::kTableSize);
4740 4554
4741
4742 void ChoiceNode::FillInBMInfo(intptr_t offset, 4555 void ChoiceNode::FillInBMInfo(intptr_t offset,
4743 intptr_t budget, 4556 intptr_t budget,
4744 BoyerMooreLookahead* bm, 4557 BoyerMooreLookahead* bm,
4745 bool not_at_start) { 4558 bool not_at_start) {
4746 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); 4559 ZoneGrowableArray<GuardedAlternative>* alts = alternatives();
4747 budget = (budget - 1) / alts->length(); 4560 budget = (budget - 1) / alts->length();
4748 for (intptr_t i = 0; i < alts->length(); i++) { 4561 for (intptr_t i = 0; i < alts->length(); i++) {
4749 GuardedAlternative& alt = (*alts)[i]; 4562 GuardedAlternative& alt = (*alts)[i];
4750 if (alt.guards() != NULL && alt.guards()->length() != 0) { 4563 if (alt.guards() != NULL && alt.guards()->length() != 0) {
4751 bm->SetRest(offset); // Give up trying to fill in info. 4564 bm->SetRest(offset); // Give up trying to fill in info.
4752 SaveBMInfo(bm, not_at_start, offset); 4565 SaveBMInfo(bm, not_at_start, offset);
4753 return; 4566 return;
4754 } 4567 }
4755 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); 4568 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
4756 } 4569 }
4757 SaveBMInfo(bm, not_at_start, offset); 4570 SaveBMInfo(bm, not_at_start, offset);
4758 } 4571 }
4759 4572
4760
4761 void TextNode::FillInBMInfo(intptr_t initial_offset, 4573 void TextNode::FillInBMInfo(intptr_t initial_offset,
4762 intptr_t budget, 4574 intptr_t budget,
4763 BoyerMooreLookahead* bm, 4575 BoyerMooreLookahead* bm,
4764 bool not_at_start) { 4576 bool not_at_start) {
4765 if (initial_offset >= bm->length()) return; 4577 if (initial_offset >= bm->length()) return;
4766 intptr_t offset = initial_offset; 4578 intptr_t offset = initial_offset;
4767 intptr_t max_char = bm->max_char(); 4579 intptr_t max_char = bm->max_char();
4768 for (intptr_t i = 0; i < elements()->length(); i++) { 4580 for (intptr_t i = 0; i < elements()->length(); i++) {
4769 if (offset >= bm->length()) { 4581 if (offset >= bm->length()) {
4770 if (initial_offset == 0) set_bm_info(not_at_start, bm); 4582 if (initial_offset == 0) set_bm_info(not_at_start, bm);
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
4811 } 4623 }
4812 if (offset >= bm->length()) { 4624 if (offset >= bm->length()) {
4813 if (initial_offset == 0) set_bm_info(not_at_start, bm); 4625 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4814 return; 4626 return;
4815 } 4627 }
4816 on_success()->FillInBMInfo(offset, budget - 1, bm, 4628 on_success()->FillInBMInfo(offset, budget - 1, bm,
4817 true); // Not at start after a text node. 4629 true); // Not at start after a text node.
4818 if (initial_offset == 0) set_bm_info(not_at_start, bm); 4630 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4819 } 4631 }
4820 4632
4821
4822 #if !defined(DART_PRECOMPILED_RUNTIME) 4633 #if !defined(DART_PRECOMPILED_RUNTIME)
4823 RegExpEngine::CompilationResult RegExpEngine::CompileIR( 4634 RegExpEngine::CompilationResult RegExpEngine::CompileIR(
4824 RegExpCompileData* data, 4635 RegExpCompileData* data,
4825 const ParsedFunction* parsed_function, 4636 const ParsedFunction* parsed_function,
4826 const ZoneGrowableArray<const ICData*>& ic_data_array, 4637 const ZoneGrowableArray<const ICData*>& ic_data_array,
4827 intptr_t osr_id) { 4638 intptr_t osr_id) {
4828 ASSERT(!FLAG_interpret_irregexp); 4639 ASSERT(!FLAG_interpret_irregexp);
4829 Zone* zone = Thread::Current()->zone(); 4640 Zone* zone = Thread::Current()->zone();
4830 4641
4831 const Function& function = parsed_function->function(); 4642 const Function& function = parsed_function->function();
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
4923 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); 4734 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
4924 4735
4925 if (FLAG_trace_irregexp) { 4736 if (FLAG_trace_irregexp) {
4926 macro_assembler->PrintBlocks(); 4737 macro_assembler->PrintBlocks();
4927 } 4738 }
4928 4739
4929 return result; 4740 return result;
4930 } 4741 }
4931 #endif // !defined(DART_PRECOMPILED_RUNTIME) 4742 #endif // !defined(DART_PRECOMPILED_RUNTIME)
4932 4743
4933
4934 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( 4744 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode(
4935 RegExpCompileData* data, 4745 RegExpCompileData* data,
4936 const RegExp& regexp, 4746 const RegExp& regexp,
4937 bool is_one_byte, 4747 bool is_one_byte,
4938 bool is_sticky, 4748 bool is_sticky,
4939 Zone* zone) { 4749 Zone* zone) {
4940 ASSERT(FLAG_interpret_irregexp); 4750 ASSERT(FLAG_interpret_irregexp);
4941 const String& pattern = String::Handle(zone, regexp.pattern()); 4751 const String& pattern = String::Handle(zone, regexp.pattern());
4942 4752
4943 ASSERT(!regexp.IsNull()); 4753 ASSERT(!regexp.IsNull());
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
5026 RegExpEngine::CompilationResult result = 4836 RegExpEngine::CompilationResult result =
5027 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); 4837 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
5028 4838
5029 if (FLAG_trace_irregexp) { 4839 if (FLAG_trace_irregexp) {
5030 macro_assembler->PrintBlocks(); 4840 macro_assembler->PrintBlocks();
5031 } 4841 }
5032 4842
5033 return result; 4843 return result;
5034 } 4844 }
5035 4845
5036
5037 static void CreateSpecializedFunction(Thread* thread, 4846 static void CreateSpecializedFunction(Thread* thread,
5038 Zone* zone, 4847 Zone* zone,
5039 const RegExp& regexp, 4848 const RegExp& regexp,
5040 intptr_t specialization_cid, 4849 intptr_t specialization_cid,
5041 bool sticky, 4850 bool sticky,
5042 const Object& owner) { 4851 const Object& owner) {
5043 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; 4852 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
5044 4853
5045 Function& fn = 4854 Function& fn =
5046 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), 4855 Function::Handle(zone, Function::New(Symbols::ColonMatcher(),
(...skipping 27 matching lines...) Expand all
5074 4883
5075 // Cache the result. 4884 // Cache the result.
5076 regexp.set_function(specialization_cid, sticky, fn); 4885 regexp.set_function(specialization_cid, sticky, fn);
5077 4886
5078 fn.SetRegExpData(regexp, specialization_cid, sticky); 4887 fn.SetRegExpData(regexp, specialization_cid, sticky);
5079 fn.set_is_debuggable(false); 4888 fn.set_is_debuggable(false);
5080 4889
5081 // The function is compiled lazily during the first call. 4890 // The function is compiled lazily during the first call.
5082 } 4891 }
5083 4892
5084
5085 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, 4893 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread,
5086 const String& pattern, 4894 const String& pattern,
5087 bool multi_line, 4895 bool multi_line,
5088 bool ignore_case) { 4896 bool ignore_case) {
5089 Zone* zone = thread->zone(); 4897 Zone* zone = thread->zone();
5090 const RegExp& regexp = RegExp::Handle(RegExp::New()); 4898 const RegExp& regexp = RegExp::Handle(RegExp::New());
5091 4899
5092 regexp.set_pattern(pattern); 4900 regexp.set_pattern(pattern);
5093 4901
5094 if (multi_line) { 4902 if (multi_line) {
(...skipping 18 matching lines...) Expand all
5113 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, 4921 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false,
5114 owner); 4922 owner);
5115 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, 4923 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true,
5116 owner); 4924 owner);
5117 } 4925 }
5118 } 4926 }
5119 4927
5120 return regexp.raw(); 4928 return regexp.raw();
5121 } 4929 }
5122 4930
5123
5124 } // namespace dart 4931 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698