| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
| 6 | 6 |
| 7 #include "vm/dart_entry.h" | 7 #include "vm/dart_entry.h" |
| 8 #include "vm/regexp_assembler.h" | 8 #include "vm/regexp_assembler.h" |
| 9 #include "vm/regexp_assembler_bytecode.h" | 9 #include "vm/regexp_assembler_bytecode.h" |
| 10 #include "vm/regexp_assembler_ir.h" | 10 #include "vm/regexp_assembler_ir.h" |
| 11 #include "vm/regexp_ast.h" | 11 #include "vm/regexp_ast.h" |
| 12 #include "vm/symbols.h" |
| 13 #include "vm/thread.h" |
| 12 #include "vm/unibrow-inl.h" | 14 #include "vm/unibrow-inl.h" |
| 13 #include "vm/unicode.h" | 15 #include "vm/unicode.h" |
| 14 #include "vm/symbols.h" | |
| 15 #include "vm/thread.h" | |
| 16 | 16 |
| 17 #define Z (zone()) | 17 #define Z (zone()) |
| 18 | 18 |
| 19 namespace dart { | 19 namespace dart { |
| 20 | 20 |
| 21 // Default to generating optimized regexp code. | 21 // Default to generating optimized regexp code. |
| 22 static const bool kRegexpOptimization = true; | 22 static const bool kRegexpOptimization = true; |
| 23 | 23 |
| 24 // More makes code generation slower, less makes V8 benchmark score lower. | 24 // More makes code generation slower, less makes V8 benchmark score lower. |
| 25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; | 25 static const intptr_t kMaxLookaheadForBoyerMoore = 8; |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 189 // string. Therefore code generated for a non-trivial trace is specialized | 189 // string. Therefore code generated for a non-trivial trace is specialized |
| 190 // to that trace. The code generator therefore has the ability to generate | 190 // to that trace. The code generator therefore has the ability to generate |
| 191 // code for each node several times. In order to limit the size of the | 191 // code for each node several times. In order to limit the size of the |
| 192 // generated code there is an arbitrary limit on how many specialized sets of | 192 // generated code there is an arbitrary limit on how many specialized sets of |
| 193 // code may be generated for a given node. If the limit is reached, the | 193 // code may be generated for a given node. If the limit is reached, the |
| 194 // trace is flushed and a generic version of the code for a node is emitted. | 194 // trace is flushed and a generic version of the code for a node is emitted. |
| 195 // This is subsequently used for that node. The code emitted for non-generic | 195 // This is subsequently used for that node. The code emitted for non-generic |
| 196 // trace is not recorded in the node and so it cannot currently be reused in | 196 // trace is not recorded in the node and so it cannot currently be reused in |
| 197 // the event that code generation is requested for an identical trace. | 197 // the event that code generation is requested for an identical trace. |
| 198 | 198 |
| 199 | |
| 200 void RegExpTree::AppendToText(RegExpText* text) { | 199 void RegExpTree::AppendToText(RegExpText* text) { |
| 201 UNREACHABLE(); | 200 UNREACHABLE(); |
| 202 } | 201 } |
| 203 | 202 |
| 204 | |
| 205 void RegExpAtom::AppendToText(RegExpText* text) { | 203 void RegExpAtom::AppendToText(RegExpText* text) { |
| 206 text->AddElement(TextElement::Atom(this)); | 204 text->AddElement(TextElement::Atom(this)); |
| 207 } | 205 } |
| 208 | 206 |
| 209 | |
| 210 void RegExpCharacterClass::AppendToText(RegExpText* text) { | 207 void RegExpCharacterClass::AppendToText(RegExpText* text) { |
| 211 text->AddElement(TextElement::CharClass(this)); | 208 text->AddElement(TextElement::CharClass(this)); |
| 212 } | 209 } |
| 213 | 210 |
| 214 | |
| 215 void RegExpText::AppendToText(RegExpText* text) { | 211 void RegExpText::AppendToText(RegExpText* text) { |
| 216 for (intptr_t i = 0; i < elements()->length(); i++) | 212 for (intptr_t i = 0; i < elements()->length(); i++) |
| 217 text->AddElement((*elements())[i]); | 213 text->AddElement((*elements())[i]); |
| 218 } | 214 } |
| 219 | 215 |
| 220 | |
| 221 TextElement TextElement::Atom(RegExpAtom* atom) { | 216 TextElement TextElement::Atom(RegExpAtom* atom) { |
| 222 return TextElement(ATOM, atom); | 217 return TextElement(ATOM, atom); |
| 223 } | 218 } |
| 224 | 219 |
| 225 | |
| 226 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { | 220 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) { |
| 227 return TextElement(CHAR_CLASS, char_class); | 221 return TextElement(CHAR_CLASS, char_class); |
| 228 } | 222 } |
| 229 | 223 |
| 230 | |
| 231 intptr_t TextElement::length() const { | 224 intptr_t TextElement::length() const { |
| 232 switch (text_type()) { | 225 switch (text_type()) { |
| 233 case ATOM: | 226 case ATOM: |
| 234 return atom()->length(); | 227 return atom()->length(); |
| 235 | 228 |
| 236 case CHAR_CLASS: | 229 case CHAR_CLASS: |
| 237 return 1; | 230 return 1; |
| 238 } | 231 } |
| 239 UNREACHABLE(); | 232 UNREACHABLE(); |
| 240 return 0; | 233 return 0; |
| 241 } | 234 } |
| 242 | 235 |
| 243 | |
| 244 class FrequencyCollator : public ValueObject { | 236 class FrequencyCollator : public ValueObject { |
| 245 public: | 237 public: |
| 246 FrequencyCollator() : total_samples_(0) { | 238 FrequencyCollator() : total_samples_(0) { |
| 247 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { | 239 for (intptr_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) { |
| 248 frequencies_[i] = CharacterFrequency(i); | 240 frequencies_[i] = CharacterFrequency(i); |
| 249 } | 241 } |
| 250 } | 242 } |
| 251 | 243 |
| 252 void CountCharacter(intptr_t character) { | 244 void CountCharacter(intptr_t character) { |
| 253 intptr_t index = (character & RegExpMacroAssembler::kTableMask); | 245 intptr_t index = (character & RegExpMacroAssembler::kTableMask); |
| (...skipping 22 matching lines...) Expand all Loading... |
| 276 intptr_t counter() { return counter_; } | 268 intptr_t counter() { return counter_; } |
| 277 intptr_t character() { return character_; } | 269 intptr_t character() { return character_; } |
| 278 | 270 |
| 279 private: | 271 private: |
| 280 intptr_t counter_; | 272 intptr_t counter_; |
| 281 intptr_t character_; | 273 intptr_t character_; |
| 282 | 274 |
| 283 DISALLOW_ALLOCATION(); | 275 DISALLOW_ALLOCATION(); |
| 284 }; | 276 }; |
| 285 | 277 |
| 286 | |
| 287 private: | 278 private: |
| 288 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 279 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
| 289 intptr_t total_samples_; | 280 intptr_t total_samples_; |
| 290 }; | 281 }; |
| 291 | 282 |
| 292 | |
| 293 class RegExpCompiler : public ValueObject { | 283 class RegExpCompiler : public ValueObject { |
| 294 public: | 284 public: |
| 295 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); | 285 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); |
| 296 | 286 |
| 297 intptr_t AllocateRegister() { return next_register_++; } | 287 intptr_t AllocateRegister() { return next_register_++; } |
| 298 | 288 |
| 299 #if !defined(DART_PRECOMPILED_RUNTIME) | 289 #if !defined(DART_PRECOMPILED_RUNTIME) |
| 300 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, | 290 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, |
| 301 RegExpNode* start, | 291 RegExpNode* start, |
| 302 intptr_t capture_count, | 292 intptr_t capture_count, |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 345 intptr_t recursion_depth_; | 335 intptr_t recursion_depth_; |
| 346 RegExpMacroAssembler* macro_assembler_; | 336 RegExpMacroAssembler* macro_assembler_; |
| 347 bool ignore_case_; | 337 bool ignore_case_; |
| 348 bool is_one_byte_; | 338 bool is_one_byte_; |
| 349 bool reg_exp_too_big_; | 339 bool reg_exp_too_big_; |
| 350 intptr_t current_expansion_factor_; | 340 intptr_t current_expansion_factor_; |
| 351 FrequencyCollator frequency_collator_; | 341 FrequencyCollator frequency_collator_; |
| 352 Zone* zone_; | 342 Zone* zone_; |
| 353 }; | 343 }; |
| 354 | 344 |
| 355 | |
| 356 class RecursionCheck : public ValueObject { | 345 class RecursionCheck : public ValueObject { |
| 357 public: | 346 public: |
| 358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 347 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
| 359 compiler->IncrementRecursionDepth(); | 348 compiler->IncrementRecursionDepth(); |
| 360 } | 349 } |
| 361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 350 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 362 | 351 |
| 363 private: | 352 private: |
| 364 RegExpCompiler* compiler_; | 353 RegExpCompiler* compiler_; |
| 365 }; | 354 }; |
| 366 | 355 |
| 367 | |
| 368 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 356 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
| 369 return RegExpEngine::CompilationResult("RegExp too big"); | 357 return RegExpEngine::CompilationResult("RegExp too big"); |
| 370 } | 358 } |
| 371 | 359 |
| 372 | |
| 373 // Attempts to compile the regexp using an Irregexp code generator. Returns | 360 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 374 // a fixed array or a null handle depending on whether it succeeded. | 361 // a fixed array or a null handle depending on whether it succeeded. |
| 375 RegExpCompiler::RegExpCompiler(intptr_t capture_count, | 362 RegExpCompiler::RegExpCompiler(intptr_t capture_count, |
| 376 bool ignore_case, | 363 bool ignore_case, |
| 377 bool is_one_byte) | 364 bool is_one_byte) |
| 378 : next_register_(2 * (capture_count + 1)), | 365 : next_register_(2 * (capture_count + 1)), |
| 379 work_list_(NULL), | 366 work_list_(NULL), |
| 380 recursion_depth_(0), | 367 recursion_depth_(0), |
| 381 ignore_case_(ignore_case), | 368 ignore_case_(ignore_case), |
| 382 is_one_byte_(is_one_byte), | 369 is_one_byte_(is_one_byte), |
| 383 reg_exp_too_big_(false), | 370 reg_exp_too_big_(false), |
| 384 current_expansion_factor_(1), | 371 current_expansion_factor_(1), |
| 385 zone_(Thread::Current()->zone()) { | 372 zone_(Thread::Current()->zone()) { |
| 386 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); | 373 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); |
| 387 } | 374 } |
| 388 | 375 |
| 389 | |
| 390 #if !defined(DART_PRECOMPILED_RUNTIME) | 376 #if !defined(DART_PRECOMPILED_RUNTIME) |
| 391 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 377 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| 392 IRRegExpMacroAssembler* macro_assembler, | 378 IRRegExpMacroAssembler* macro_assembler, |
| 393 RegExpNode* start, | 379 RegExpNode* start, |
| 394 intptr_t capture_count, | 380 intptr_t capture_count, |
| 395 const String& pattern) { | 381 const String& pattern) { |
| 396 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 382 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
| 397 macro_assembler_ = macro_assembler; | 383 macro_assembler_ = macro_assembler; |
| 398 | 384 |
| 399 ZoneGrowableArray<RegExpNode*> work_list(0); | 385 ZoneGrowableArray<RegExpNode*> work_list(0); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 412 macro_assembler->GenerateBacktrackBlock(); | 398 macro_assembler->GenerateBacktrackBlock(); |
| 413 macro_assembler->FinalizeRegistersArray(); | 399 macro_assembler->FinalizeRegistersArray(); |
| 414 | 400 |
| 415 return RegExpEngine::CompilationResult( | 401 return RegExpEngine::CompilationResult( |
| 416 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), | 402 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), |
| 417 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), | 403 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), |
| 418 next_register_); | 404 next_register_); |
| 419 } | 405 } |
| 420 #endif | 406 #endif |
| 421 | 407 |
| 422 | |
| 423 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 408 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| 424 BytecodeRegExpMacroAssembler* macro_assembler, | 409 BytecodeRegExpMacroAssembler* macro_assembler, |
| 425 RegExpNode* start, | 410 RegExpNode* start, |
| 426 intptr_t capture_count, | 411 intptr_t capture_count, |
| 427 const String& pattern) { | 412 const String& pattern) { |
| 428 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 413 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
| 429 macro_assembler_ = macro_assembler; | 414 macro_assembler_ = macro_assembler; |
| 430 | 415 |
| 431 ZoneGrowableArray<RegExpNode*> work_list(0); | 416 ZoneGrowableArray<RegExpNode*> work_list(0); |
| 432 work_list_ = &work_list; | 417 work_list_ = &work_list; |
| 433 BlockLabel fail; | 418 BlockLabel fail; |
| 434 macro_assembler_->PushBacktrack(&fail); | 419 macro_assembler_->PushBacktrack(&fail); |
| 435 Trace new_trace; | 420 Trace new_trace; |
| 436 start->Emit(this, &new_trace); | 421 start->Emit(this, &new_trace); |
| 437 macro_assembler_->BindBlock(&fail); | 422 macro_assembler_->BindBlock(&fail); |
| 438 macro_assembler_->Fail(); | 423 macro_assembler_->Fail(); |
| 439 while (!work_list.is_empty()) { | 424 while (!work_list.is_empty()) { |
| 440 work_list.RemoveLast()->Emit(this, &new_trace); | 425 work_list.RemoveLast()->Emit(this, &new_trace); |
| 441 } | 426 } |
| 442 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 427 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
| 443 | 428 |
| 444 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); | 429 TypedData& bytecode = TypedData::ZoneHandle(macro_assembler->GetBytecode()); |
| 445 return RegExpEngine::CompilationResult(&bytecode, next_register_); | 430 return RegExpEngine::CompilationResult(&bytecode, next_register_); |
| 446 } | 431 } |
| 447 | 432 |
| 448 | |
| 449 bool Trace::DeferredAction::Mentions(intptr_t that) { | 433 bool Trace::DeferredAction::Mentions(intptr_t that) { |
| 450 if (action_type() == ActionNode::CLEAR_CAPTURES) { | 434 if (action_type() == ActionNode::CLEAR_CAPTURES) { |
| 451 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 435 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
| 452 return range.Contains(that); | 436 return range.Contains(that); |
| 453 } else { | 437 } else { |
| 454 return reg() == that; | 438 return reg() == that; |
| 455 } | 439 } |
| 456 } | 440 } |
| 457 | 441 |
| 458 | |
| 459 bool Trace::mentions_reg(intptr_t reg) { | 442 bool Trace::mentions_reg(intptr_t reg) { |
| 460 for (DeferredAction* action = actions_; action != NULL; | 443 for (DeferredAction* action = actions_; action != NULL; |
| 461 action = action->next()) { | 444 action = action->next()) { |
| 462 if (action->Mentions(reg)) return true; | 445 if (action->Mentions(reg)) return true; |
| 463 } | 446 } |
| 464 return false; | 447 return false; |
| 465 } | 448 } |
| 466 | 449 |
| 467 | |
| 468 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { | 450 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { |
| 469 ASSERT(*cp_offset == 0); | 451 ASSERT(*cp_offset == 0); |
| 470 for (DeferredAction* action = actions_; action != NULL; | 452 for (DeferredAction* action = actions_; action != NULL; |
| 471 action = action->next()) { | 453 action = action->next()) { |
| 472 if (action->Mentions(reg)) { | 454 if (action->Mentions(reg)) { |
| 473 if (action->action_type() == ActionNode::STORE_POSITION) { | 455 if (action->action_type() == ActionNode::STORE_POSITION) { |
| 474 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); | 456 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); |
| 475 return true; | 457 return true; |
| 476 } else { | 458 } else { |
| 477 return false; | 459 return false; |
| 478 } | 460 } |
| 479 } | 461 } |
| 480 } | 462 } |
| 481 return false; | 463 return false; |
| 482 } | 464 } |
| 483 | 465 |
| 484 | |
| 485 // This is called as we come into a loop choice node and some other tricky | 466 // This is called as we come into a loop choice node and some other tricky |
| 486 // nodes. It normalizes the state of the code generator to ensure we can | 467 // nodes. It normalizes the state of the code generator to ensure we can |
| 487 // generate generic code. | 468 // generate generic code. |
| 488 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { | 469 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { |
| 489 intptr_t max_register = RegExpCompiler::kNoRegister; | 470 intptr_t max_register = RegExpCompiler::kNoRegister; |
| 490 for (DeferredAction* action = actions_; action != NULL; | 471 for (DeferredAction* action = actions_; action != NULL; |
| 491 action = action->next()) { | 472 action = action->next()) { |
| 492 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { | 473 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { |
| 493 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); | 474 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); |
| 494 for (intptr_t i = range.from(); i <= range.to(); i++) | 475 for (intptr_t i = range.from(); i <= range.to(); i++) |
| 495 affected_registers->Set(i, zone); | 476 affected_registers->Set(i, zone); |
| 496 if (range.to() > max_register) max_register = range.to(); | 477 if (range.to() > max_register) max_register = range.to(); |
| 497 } else { | 478 } else { |
| 498 affected_registers->Set(action->reg(), zone); | 479 affected_registers->Set(action->reg(), zone); |
| 499 if (action->reg() > max_register) max_register = action->reg(); | 480 if (action->reg() > max_register) max_register = action->reg(); |
| 500 } | 481 } |
| 501 } | 482 } |
| 502 return max_register; | 483 return max_register; |
| 503 } | 484 } |
| 504 | 485 |
| 505 | |
| 506 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, | 486 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, |
| 507 intptr_t max_register, | 487 intptr_t max_register, |
| 508 const OutSet& registers_to_pop, | 488 const OutSet& registers_to_pop, |
| 509 const OutSet& registers_to_clear) { | 489 const OutSet& registers_to_clear) { |
| 510 for (intptr_t reg = max_register; reg >= 0; reg--) { | 490 for (intptr_t reg = max_register; reg >= 0; reg--) { |
| 511 if (registers_to_pop.Get(reg)) { | 491 if (registers_to_pop.Get(reg)) { |
| 512 assembler->PopRegister(reg); | 492 assembler->PopRegister(reg); |
| 513 } else if (registers_to_clear.Get(reg)) { | 493 } else if (registers_to_clear.Get(reg)) { |
| 514 intptr_t clear_to = reg; | 494 intptr_t clear_to = reg; |
| 515 while (reg > 0 && registers_to_clear.Get(reg - 1)) { | 495 while (reg > 0 && registers_to_clear.Get(reg - 1)) { |
| 516 reg--; | 496 reg--; |
| 517 } | 497 } |
| 518 assembler->ClearRegisters(reg, clear_to); | 498 assembler->ClearRegisters(reg, clear_to); |
| 519 } | 499 } |
| 520 } | 500 } |
| 521 } | 501 } |
| 522 | 502 |
| 523 | |
| 524 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, | 503 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
| 525 intptr_t max_register, | 504 intptr_t max_register, |
| 526 const OutSet& affected_registers, | 505 const OutSet& affected_registers, |
| 527 OutSet* registers_to_pop, | 506 OutSet* registers_to_pop, |
| 528 OutSet* registers_to_clear, | 507 OutSet* registers_to_clear, |
| 529 Zone* zone) { | 508 Zone* zone) { |
| 530 for (intptr_t reg = 0; reg <= max_register; reg++) { | 509 for (intptr_t reg = 0; reg <= max_register; reg++) { |
| 531 if (!affected_registers.Get(reg)) { | 510 if (!affected_registers.Get(reg)) { |
| 532 continue; | 511 continue; |
| 533 } | 512 } |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 628 } else if (clear) { | 607 } else if (clear) { |
| 629 assembler->ClearRegisters(reg, reg); | 608 assembler->ClearRegisters(reg, reg); |
| 630 } else if (absolute) { | 609 } else if (absolute) { |
| 631 assembler->SetRegister(reg, value); | 610 assembler->SetRegister(reg, value); |
| 632 } else if (value != 0) { | 611 } else if (value != 0) { |
| 633 assembler->AdvanceRegister(reg, value); | 612 assembler->AdvanceRegister(reg, value); |
| 634 } | 613 } |
| 635 } | 614 } |
| 636 } | 615 } |
| 637 | 616 |
| 638 | |
| 639 // This is called as we come into a loop choice node and some other tricky | 617 // This is called as we come into a loop choice node and some other tricky |
| 640 // nodes. It normalizes the state of the code generator to ensure we can | 618 // nodes. It normalizes the state of the code generator to ensure we can |
| 641 // generate generic code. | 619 // generate generic code. |
| 642 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 620 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { |
| 643 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 621 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 644 | 622 |
| 645 ASSERT(!is_trivial()); | 623 ASSERT(!is_trivial()); |
| 646 | 624 |
| 647 if (actions_ == NULL && backtrack() == NULL) { | 625 if (actions_ == NULL && backtrack() == NULL) { |
| 648 // Here we just have some deferred cp advances to fix and we are back to | 626 // Here we just have some deferred cp advances to fix and we are back to |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 685 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, | 663 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, |
| 686 registers_to_clear); | 664 registers_to_clear); |
| 687 if (backtrack() == NULL) { | 665 if (backtrack() == NULL) { |
| 688 assembler->Backtrack(); | 666 assembler->Backtrack(); |
| 689 } else { | 667 } else { |
| 690 assembler->PopCurrentPosition(); | 668 assembler->PopCurrentPosition(); |
| 691 assembler->GoTo(backtrack()); | 669 assembler->GoTo(backtrack()); |
| 692 } | 670 } |
| 693 } | 671 } |
| 694 | 672 |
| 695 | |
| 696 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { | 673 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 697 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 674 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 698 | 675 |
| 699 // Omit flushing the trace. We discard the entire stack frame anyway. | 676 // Omit flushing the trace. We discard the entire stack frame anyway. |
| 700 | 677 |
| 701 if (!label()->IsBound()) { | 678 if (!label()->IsBound()) { |
| 702 // We are completely independent of the trace, since we ignore it, | 679 // We are completely independent of the trace, since we ignore it, |
| 703 // so this code can be used as the generic version. | 680 // so this code can be used as the generic version. |
| 704 assembler->BindBlock(label()); | 681 assembler->BindBlock(label()); |
| 705 } | 682 } |
| 706 | 683 |
| 707 // Throw away everything on the backtrack stack since the start | 684 // Throw away everything on the backtrack stack since the start |
| 708 // of the negative submatch and restore the character position. | 685 // of the negative submatch and restore the character position. |
| 709 assembler->ReadCurrentPositionFromRegister(current_position_register_); | 686 assembler->ReadCurrentPositionFromRegister(current_position_register_); |
| 710 assembler->ReadStackPointerFromRegister(stack_pointer_register_); | 687 assembler->ReadStackPointerFromRegister(stack_pointer_register_); |
| 711 if (clear_capture_count_ > 0) { | 688 if (clear_capture_count_ > 0) { |
| 712 // Clear any captures that might have been performed during the success | 689 // Clear any captures that might have been performed during the success |
| 713 // of the body of the negative look-ahead. | 690 // of the body of the negative look-ahead. |
| 714 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; | 691 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; |
| 715 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); | 692 assembler->ClearRegisters(clear_capture_start_, clear_capture_end); |
| 716 } | 693 } |
| 717 // Now that we have unwound the stack we find at the top of the stack the | 694 // Now that we have unwound the stack we find at the top of the stack the |
| 718 // backtrack that the BeginSubmatch node got. | 695 // backtrack that the BeginSubmatch node got. |
| 719 assembler->Backtrack(); | 696 assembler->Backtrack(); |
| 720 } | 697 } |
| 721 | 698 |
| 722 | |
| 723 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 699 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 724 if (!trace->is_trivial()) { | 700 if (!trace->is_trivial()) { |
| 725 trace->Flush(compiler, this); | 701 trace->Flush(compiler, this); |
| 726 return; | 702 return; |
| 727 } | 703 } |
| 728 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 704 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 729 if (!label()->IsBound()) { | 705 if (!label()->IsBound()) { |
| 730 assembler->BindBlock(label()); | 706 assembler->BindBlock(label()); |
| 731 } | 707 } |
| 732 switch (action_) { | 708 switch (action_) { |
| 733 case ACCEPT: | 709 case ACCEPT: |
| 734 assembler->Succeed(); | 710 assembler->Succeed(); |
| 735 return; | 711 return; |
| 736 case BACKTRACK: | 712 case BACKTRACK: |
| 737 assembler->GoTo(trace->backtrack()); | 713 assembler->GoTo(trace->backtrack()); |
| 738 return; | 714 return; |
| 739 case NEGATIVE_SUBMATCH_SUCCESS: | 715 case NEGATIVE_SUBMATCH_SUCCESS: |
| 740 // This case is handled in a different virtual method. | 716 // This case is handled in a different virtual method. |
| 741 UNREACHABLE(); | 717 UNREACHABLE(); |
| 742 } | 718 } |
| 743 UNIMPLEMENTED(); | 719 UNIMPLEMENTED(); |
| 744 } | 720 } |
| 745 | 721 |
| 746 | |
| 747 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { | 722 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { |
| 748 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); | 723 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); |
| 749 guards_->Add(guard); | 724 guards_->Add(guard); |
| 750 } | 725 } |
| 751 | 726 |
| 752 | |
| 753 ActionNode* ActionNode::SetRegister(intptr_t reg, | 727 ActionNode* ActionNode::SetRegister(intptr_t reg, |
| 754 intptr_t val, | 728 intptr_t val, |
| 755 RegExpNode* on_success) { | 729 RegExpNode* on_success) { |
| 756 ActionNode* result = | 730 ActionNode* result = |
| 757 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); | 731 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); |
| 758 result->data_.u_store_register.reg = reg; | 732 result->data_.u_store_register.reg = reg; |
| 759 result->data_.u_store_register.value = val; | 733 result->data_.u_store_register.value = val; |
| 760 return result; | 734 return result; |
| 761 } | 735 } |
| 762 | 736 |
| 763 | |
| 764 ActionNode* ActionNode::IncrementRegister(intptr_t reg, | 737 ActionNode* ActionNode::IncrementRegister(intptr_t reg, |
| 765 RegExpNode* on_success) { | 738 RegExpNode* on_success) { |
| 766 ActionNode* result = | 739 ActionNode* result = |
| 767 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); | 740 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); |
| 768 result->data_.u_increment_register.reg = reg; | 741 result->data_.u_increment_register.reg = reg; |
| 769 return result; | 742 return result; |
| 770 } | 743 } |
| 771 | 744 |
| 772 | |
| 773 ActionNode* ActionNode::StorePosition(intptr_t reg, | 745 ActionNode* ActionNode::StorePosition(intptr_t reg, |
| 774 bool is_capture, | 746 bool is_capture, |
| 775 RegExpNode* on_success) { | 747 RegExpNode* on_success) { |
| 776 ActionNode* result = | 748 ActionNode* result = |
| 777 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); | 749 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); |
| 778 result->data_.u_position_register.reg = reg; | 750 result->data_.u_position_register.reg = reg; |
| 779 result->data_.u_position_register.is_capture = is_capture; | 751 result->data_.u_position_register.is_capture = is_capture; |
| 780 return result; | 752 return result; |
| 781 } | 753 } |
| 782 | 754 |
| 783 | |
| 784 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { | 755 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { |
| 785 ActionNode* result = | 756 ActionNode* result = |
| 786 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); | 757 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); |
| 787 result->data_.u_clear_captures.range_from = range.from(); | 758 result->data_.u_clear_captures.range_from = range.from(); |
| 788 result->data_.u_clear_captures.range_to = range.to(); | 759 result->data_.u_clear_captures.range_to = range.to(); |
| 789 return result; | 760 return result; |
| 790 } | 761 } |
| 791 | 762 |
| 792 | |
| 793 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, | 763 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, |
| 794 intptr_t position_reg, | 764 intptr_t position_reg, |
| 795 RegExpNode* on_success) { | 765 RegExpNode* on_success) { |
| 796 ActionNode* result = | 766 ActionNode* result = |
| 797 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); | 767 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); |
| 798 result->data_.u_submatch.stack_pointer_register = stack_reg; | 768 result->data_.u_submatch.stack_pointer_register = stack_reg; |
| 799 result->data_.u_submatch.current_position_register = position_reg; | 769 result->data_.u_submatch.current_position_register = position_reg; |
| 800 return result; | 770 return result; |
| 801 } | 771 } |
| 802 | 772 |
| 803 | |
| 804 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, | 773 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, |
| 805 intptr_t position_reg, | 774 intptr_t position_reg, |
| 806 intptr_t clear_register_count, | 775 intptr_t clear_register_count, |
| 807 intptr_t clear_register_from, | 776 intptr_t clear_register_from, |
| 808 RegExpNode* on_success) { | 777 RegExpNode* on_success) { |
| 809 ActionNode* result = new (on_success->zone()) | 778 ActionNode* result = new (on_success->zone()) |
| 810 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); | 779 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
| 811 result->data_.u_submatch.stack_pointer_register = stack_reg; | 780 result->data_.u_submatch.stack_pointer_register = stack_reg; |
| 812 result->data_.u_submatch.current_position_register = position_reg; | 781 result->data_.u_submatch.current_position_register = position_reg; |
| 813 result->data_.u_submatch.clear_register_count = clear_register_count; | 782 result->data_.u_submatch.clear_register_count = clear_register_count; |
| 814 result->data_.u_submatch.clear_register_from = clear_register_from; | 783 result->data_.u_submatch.clear_register_from = clear_register_from; |
| 815 return result; | 784 return result; |
| 816 } | 785 } |
| 817 | 786 |
| 818 | |
| 819 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, | 787 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, |
| 820 intptr_t repetition_register, | 788 intptr_t repetition_register, |
| 821 intptr_t repetition_limit, | 789 intptr_t repetition_limit, |
| 822 RegExpNode* on_success) { | 790 RegExpNode* on_success) { |
| 823 ActionNode* result = | 791 ActionNode* result = |
| 824 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); | 792 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); |
| 825 result->data_.u_empty_match_check.start_register = start_register; | 793 result->data_.u_empty_match_check.start_register = start_register; |
| 826 result->data_.u_empty_match_check.repetition_register = repetition_register; | 794 result->data_.u_empty_match_check.repetition_register = repetition_register; |
| 827 result->data_.u_empty_match_check.repetition_limit = repetition_limit; | 795 result->data_.u_empty_match_check.repetition_limit = repetition_limit; |
| 828 return result; | 796 return result; |
| 829 } | 797 } |
| 830 | 798 |
| 831 | |
| 832 #define DEFINE_ACCEPT(Type) \ | 799 #define DEFINE_ACCEPT(Type) \ |
| 833 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } | 800 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } |
| 834 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 801 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
| 835 #undef DEFINE_ACCEPT | 802 #undef DEFINE_ACCEPT |
| 836 | 803 |
| 837 | |
| 838 void LoopChoiceNode::Accept(NodeVisitor* visitor) { | 804 void LoopChoiceNode::Accept(NodeVisitor* visitor) { |
| 839 visitor->VisitLoopChoice(this); | 805 visitor->VisitLoopChoice(this); |
| 840 } | 806 } |
| 841 | 807 |
| 842 | |
| 843 // ------------------------------------------------------------------- | 808 // ------------------------------------------------------------------- |
| 844 // Emit code. | 809 // Emit code. |
| 845 | 810 |
| 846 | |
| 847 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 811 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 848 Guard* guard, | 812 Guard* guard, |
| 849 Trace* trace) { | 813 Trace* trace) { |
| 850 switch (guard->op()) { | 814 switch (guard->op()) { |
| 851 case Guard::LT: | 815 case Guard::LT: |
| 852 ASSERT(!trace->mentions_reg(guard->reg())); | 816 ASSERT(!trace->mentions_reg(guard->reg())); |
| 853 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), | 817 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), |
| 854 trace->backtrack()); | 818 trace->backtrack()); |
| 855 break; | 819 break; |
| 856 case Guard::GEQ: | 820 case Guard::GEQ: |
| 857 ASSERT(!trace->mentions_reg(guard->reg())); | 821 ASSERT(!trace->mentions_reg(guard->reg())); |
| 858 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), | 822 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), |
| 859 trace->backtrack()); | 823 trace->backtrack()); |
| 860 break; | 824 break; |
| 861 } | 825 } |
| 862 } | 826 } |
| 863 | 827 |
| 864 | |
| 865 // Returns the number of characters in the equivalence class, omitting those | 828 // Returns the number of characters in the equivalence class, omitting those |
| 866 // that cannot occur in the source string because it is ASCII. | 829 // that cannot occur in the source string because it is ASCII. |
| 867 static intptr_t GetCaseIndependentLetters(uint16_t character, | 830 static intptr_t GetCaseIndependentLetters(uint16_t character, |
| 868 bool one_byte_subject, | 831 bool one_byte_subject, |
| 869 int32_t* letters) { | 832 int32_t* letters) { |
| 870 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; | 833 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; |
| 871 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); | 834 intptr_t length = jsregexp_uncanonicalize.get(character, '\0', letters); |
| 872 // Unibrow returns 0 or 1 for characters where case independence is | 835 // Unibrow returns 0 or 1 for characters where case independence is |
| 873 // trivial. | 836 // trivial. |
| 874 if (length == 0) { | 837 if (length == 0) { |
| 875 letters[0] = character; | 838 letters[0] = character; |
| 876 length = 1; | 839 length = 1; |
| 877 } | 840 } |
| 878 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { | 841 if (!one_byte_subject || character <= Symbols::kMaxOneCharCodeSymbol) { |
| 879 return length; | 842 return length; |
| 880 } | 843 } |
| 881 | 844 |
| 882 // The standard requires that non-ASCII characters cannot have ASCII | 845 // The standard requires that non-ASCII characters cannot have ASCII |
| 883 // character codes in their equivalence class. | 846 // character codes in their equivalence class. |
| 884 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, | 847 // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore, |
| 885 // is it? For example, \u00C5 is equivalent to \u212B. | 848 // is it? For example, \u00C5 is equivalent to \u212B. |
| 886 return 0; | 849 return 0; |
| 887 } | 850 } |
| 888 | 851 |
| 889 | |
| 890 static inline bool EmitSimpleCharacter(Zone* zone, | 852 static inline bool EmitSimpleCharacter(Zone* zone, |
| 891 RegExpCompiler* compiler, | 853 RegExpCompiler* compiler, |
| 892 uint16_t c, | 854 uint16_t c, |
| 893 BlockLabel* on_failure, | 855 BlockLabel* on_failure, |
| 894 intptr_t cp_offset, | 856 intptr_t cp_offset, |
| 895 bool check, | 857 bool check, |
| 896 bool preloaded) { | 858 bool preloaded) { |
| 897 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 859 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 898 bool bound_checked = false; | 860 bool bound_checked = false; |
| 899 if (!preloaded) { | 861 if (!preloaded) { |
| 900 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 862 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 901 bound_checked = true; | 863 bound_checked = true; |
| 902 } | 864 } |
| 903 assembler->CheckNotCharacter(c, on_failure); | 865 assembler->CheckNotCharacter(c, on_failure); |
| 904 return bound_checked; | 866 return bound_checked; |
| 905 } | 867 } |
| 906 | 868 |
| 907 | |
| 908 // Only emits non-letters (things that don't have case). Only used for case | 869 // Only emits non-letters (things that don't have case). Only used for case |
| 909 // independent matches. | 870 // independent matches. |
| 910 static inline bool EmitAtomNonLetter(Zone* zone, | 871 static inline bool EmitAtomNonLetter(Zone* zone, |
| 911 RegExpCompiler* compiler, | 872 RegExpCompiler* compiler, |
| 912 uint16_t c, | 873 uint16_t c, |
| 913 BlockLabel* on_failure, | 874 BlockLabel* on_failure, |
| 914 intptr_t cp_offset, | 875 intptr_t cp_offset, |
| 915 bool check, | 876 bool check, |
| 916 bool preloaded) { | 877 bool preloaded) { |
| 917 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 878 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 933 } | 894 } |
| 934 if (!preloaded) { | 895 if (!preloaded) { |
| 935 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 896 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 936 checked = check; | 897 checked = check; |
| 937 } | 898 } |
| 938 macro_assembler->CheckNotCharacter(c, on_failure); | 899 macro_assembler->CheckNotCharacter(c, on_failure); |
| 939 } | 900 } |
| 940 return checked; | 901 return checked; |
| 941 } | 902 } |
| 942 | 903 |
| 943 | |
| 944 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 904 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
| 945 bool one_byte, | 905 bool one_byte, |
| 946 uint16_t c1, | 906 uint16_t c1, |
| 947 uint16_t c2, | 907 uint16_t c2, |
| 948 BlockLabel* on_failure) { | 908 BlockLabel* on_failure) { |
| 949 uint16_t char_mask; | 909 uint16_t char_mask; |
| 950 if (one_byte) { | 910 if (one_byte) { |
| 951 char_mask = Symbols::kMaxOneCharCodeSymbol; | 911 char_mask = Symbols::kMaxOneCharCodeSymbol; |
| 952 } else { | 912 } else { |
| 953 char_mask = Utf16::kMaxCodeUnit; | 913 char_mask = Utf16::kMaxCodeUnit; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 970 // trick. We avoid the theoretical case where negative numbers are | 930 // trick. We avoid the theoretical case where negative numbers are |
| 971 // involved in order to simplify code generation. | 931 // involved in order to simplify code generation. |
| 972 uint16_t mask = char_mask ^ diff; | 932 uint16_t mask = char_mask ^ diff; |
| 973 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, | 933 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, |
| 974 on_failure); | 934 on_failure); |
| 975 return true; | 935 return true; |
| 976 } | 936 } |
| 977 return false; | 937 return false; |
| 978 } | 938 } |
| 979 | 939 |
| 980 | |
| 981 typedef bool EmitCharacterFunction(Zone* zone, | 940 typedef bool EmitCharacterFunction(Zone* zone, |
| 982 RegExpCompiler* compiler, | 941 RegExpCompiler* compiler, |
| 983 uint16_t c, | 942 uint16_t c, |
| 984 BlockLabel* on_failure, | 943 BlockLabel* on_failure, |
| 985 intptr_t cp_offset, | 944 intptr_t cp_offset, |
| 986 bool check, | 945 bool check, |
| 987 bool preloaded); | 946 bool preloaded); |
| 988 | 947 |
| 989 // Only emits letters (things that have case). Only used for case independent | 948 // Only emits letters (things that have case). Only used for case independent |
| 990 // matches. | 949 // matches. |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1027 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 986 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
| 1028 macro_assembler->BindBlock(&ok); | 987 macro_assembler->BindBlock(&ok); |
| 1029 break; | 988 break; |
| 1030 default: | 989 default: |
| 1031 UNREACHABLE(); | 990 UNREACHABLE(); |
| 1032 break; | 991 break; |
| 1033 } | 992 } |
| 1034 return true; | 993 return true; |
| 1035 } | 994 } |
| 1036 | 995 |
| 1037 | |
| 1038 static void EmitBoundaryTest(RegExpMacroAssembler* masm, | 996 static void EmitBoundaryTest(RegExpMacroAssembler* masm, |
| 1039 intptr_t border, | 997 intptr_t border, |
| 1040 BlockLabel* fall_through, | 998 BlockLabel* fall_through, |
| 1041 BlockLabel* above_or_equal, | 999 BlockLabel* above_or_equal, |
| 1042 BlockLabel* below) { | 1000 BlockLabel* below) { |
| 1043 if (below != fall_through) { | 1001 if (below != fall_through) { |
| 1044 masm->CheckCharacterLT(border, below); | 1002 masm->CheckCharacterLT(border, below); |
| 1045 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); | 1003 if (above_or_equal != fall_through) masm->GoTo(above_or_equal); |
| 1046 } else { | 1004 } else { |
| 1047 masm->CheckCharacterGT(border - 1, above_or_equal); | 1005 masm->CheckCharacterGT(border - 1, above_or_equal); |
| 1048 } | 1006 } |
| 1049 } | 1007 } |
| 1050 | 1008 |
| 1051 | |
| 1052 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, | 1009 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, |
| 1053 intptr_t first, | 1010 intptr_t first, |
| 1054 intptr_t last, | 1011 intptr_t last, |
| 1055 BlockLabel* fall_through, | 1012 BlockLabel* fall_through, |
| 1056 BlockLabel* in_range, | 1013 BlockLabel* in_range, |
| 1057 BlockLabel* out_of_range) { | 1014 BlockLabel* out_of_range) { |
| 1058 if (in_range == fall_through) { | 1015 if (in_range == fall_through) { |
| 1059 if (first == last) { | 1016 if (first == last) { |
| 1060 masm->CheckNotCharacter(first, out_of_range); | 1017 masm->CheckNotCharacter(first, out_of_range); |
| 1061 } else { | 1018 } else { |
| 1062 masm->CheckCharacterNotInRange(first, last, out_of_range); | 1019 masm->CheckCharacterNotInRange(first, last, out_of_range); |
| 1063 } | 1020 } |
| 1064 } else { | 1021 } else { |
| 1065 if (first == last) { | 1022 if (first == last) { |
| 1066 masm->CheckCharacter(first, in_range); | 1023 masm->CheckCharacter(first, in_range); |
| 1067 } else { | 1024 } else { |
| 1068 masm->CheckCharacterInRange(first, last, in_range); | 1025 masm->CheckCharacterInRange(first, last, in_range); |
| 1069 } | 1026 } |
| 1070 if (out_of_range != fall_through) masm->GoTo(out_of_range); | 1027 if (out_of_range != fall_through) masm->GoTo(out_of_range); |
| 1071 } | 1028 } |
| 1072 } | 1029 } |
| 1073 | 1030 |
| 1074 | |
| 1075 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | 1031 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. |
| 1076 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | 1032 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. |
| 1077 static void EmitUseLookupTable(RegExpMacroAssembler* masm, | 1033 static void EmitUseLookupTable(RegExpMacroAssembler* masm, |
| 1078 ZoneGrowableArray<int>* ranges, | 1034 ZoneGrowableArray<int>* ranges, |
| 1079 intptr_t start_index, | 1035 intptr_t start_index, |
| 1080 intptr_t end_index, | 1036 intptr_t end_index, |
| 1081 intptr_t min_char, | 1037 intptr_t min_char, |
| 1082 BlockLabel* fall_through, | 1038 BlockLabel* fall_through, |
| 1083 BlockLabel* even_label, | 1039 BlockLabel* even_label, |
| 1084 BlockLabel* odd_label) { | 1040 BlockLabel* odd_label) { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1124 // TODO(erikcorry): Cache these. | 1080 // TODO(erikcorry): Cache these. |
| 1125 const TypedData& ba = TypedData::ZoneHandle( | 1081 const TypedData& ba = TypedData::ZoneHandle( |
| 1126 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | 1082 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
| 1127 for (intptr_t i = 0; i < kSize; i++) { | 1083 for (intptr_t i = 0; i < kSize; i++) { |
| 1128 ba.SetUint8(i, templ[i]); | 1084 ba.SetUint8(i, templ[i]); |
| 1129 } | 1085 } |
| 1130 masm->CheckBitInTable(ba, on_bit_set); | 1086 masm->CheckBitInTable(ba, on_bit_set); |
| 1131 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | 1087 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); |
| 1132 } | 1088 } |
| 1133 | 1089 |
| 1134 | |
| 1135 static void CutOutRange(RegExpMacroAssembler* masm, | 1090 static void CutOutRange(RegExpMacroAssembler* masm, |
| 1136 ZoneGrowableArray<int>* ranges, | 1091 ZoneGrowableArray<int>* ranges, |
| 1137 intptr_t start_index, | 1092 intptr_t start_index, |
| 1138 intptr_t end_index, | 1093 intptr_t end_index, |
| 1139 intptr_t cut_index, | 1094 intptr_t cut_index, |
| 1140 BlockLabel* even_label, | 1095 BlockLabel* even_label, |
| 1141 BlockLabel* odd_label) { | 1096 BlockLabel* odd_label) { |
| 1142 bool odd = (((cut_index - start_index) & 1) == 1); | 1097 bool odd = (((cut_index - start_index) & 1) == 1); |
| 1143 BlockLabel* in_range_label = odd ? odd_label : even_label; | 1098 BlockLabel* in_range_label = odd ? odd_label : even_label; |
| 1144 BlockLabel dummy; | 1099 BlockLabel dummy; |
| 1145 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), | 1100 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), |
| 1146 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, | 1101 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, |
| 1147 &dummy); | 1102 &dummy); |
| 1148 ASSERT(!dummy.IsLinked()); | 1103 ASSERT(!dummy.IsLinked()); |
| 1149 // Cut out the single range by rewriting the array. This creates a new | 1104 // Cut out the single range by rewriting the array. This creates a new |
| 1150 // range that is a merger of the two ranges on either side of the one we | 1105 // range that is a merger of the two ranges on either side of the one we |
| 1151 // are cutting out. The oddity of the labels is preserved. | 1106 // are cutting out. The oddity of the labels is preserved. |
| 1152 for (intptr_t j = cut_index; j > start_index; j--) { | 1107 for (intptr_t j = cut_index; j > start_index; j--) { |
| 1153 (*ranges)[j] = ranges->At(j - 1); | 1108 (*ranges)[j] = ranges->At(j - 1); |
| 1154 } | 1109 } |
| 1155 for (intptr_t j = cut_index + 1; j < end_index; j++) { | 1110 for (intptr_t j = cut_index + 1; j < end_index; j++) { |
| 1156 (*ranges)[j] = ranges->At(j + 1); | 1111 (*ranges)[j] = ranges->At(j + 1); |
| 1157 } | 1112 } |
| 1158 } | 1113 } |
| 1159 | 1114 |
| 1160 | |
| 1161 // Unicode case. Split the search space into kSize spaces that are handled | 1115 // Unicode case. Split the search space into kSize spaces that are handled |
| 1162 // with recursion. | 1116 // with recursion. |
| 1163 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, | 1117 static void SplitSearchSpace(ZoneGrowableArray<int>* ranges, |
| 1164 intptr_t start_index, | 1118 intptr_t start_index, |
| 1165 intptr_t end_index, | 1119 intptr_t end_index, |
| 1166 intptr_t* new_start_index, | 1120 intptr_t* new_start_index, |
| 1167 intptr_t* new_end_index, | 1121 intptr_t* new_end_index, |
| 1168 intptr_t* border) { | 1122 intptr_t* border) { |
| 1169 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 1123 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
| 1170 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; | 1124 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1216 if (ranges->At(*new_end_index) == *border) { | 1170 if (ranges->At(*new_end_index) == *border) { |
| 1217 (*new_end_index)--; | 1171 (*new_end_index)--; |
| 1218 } | 1172 } |
| 1219 if (*border >= ranges->At(end_index)) { | 1173 if (*border >= ranges->At(end_index)) { |
| 1220 *border = ranges->At(end_index); | 1174 *border = ranges->At(end_index); |
| 1221 *new_start_index = end_index; // Won't be used. | 1175 *new_start_index = end_index; // Won't be used. |
| 1222 *new_end_index = end_index - 1; | 1176 *new_end_index = end_index - 1; |
| 1223 } | 1177 } |
| 1224 } | 1178 } |
| 1225 | 1179 |
| 1226 | |
| 1227 // Gets a series of segment boundaries representing a character class. If the | 1180 // Gets a series of segment boundaries representing a character class. If the |
| 1228 // character is in the range between an even and an odd boundary (counting from | 1181 // character is in the range between an even and an odd boundary (counting from |
| 1229 // start_index) then go to even_label, otherwise go to odd_label. We already | 1182 // start_index) then go to even_label, otherwise go to odd_label. We already |
| 1230 // know that the character is in the range of min_char to max_char inclusive. | 1183 // know that the character is in the range of min_char to max_char inclusive. |
| 1231 // Either label can be NULL indicating backtracking. Either label can also be | 1184 // Either label can be NULL indicating backtracking. Either label can also be |
| 1232 // equal to the fall_through label. | 1185 // equal to the fall_through label. |
| 1233 static void GenerateBranches(RegExpMacroAssembler* masm, | 1186 static void GenerateBranches(RegExpMacroAssembler* masm, |
| 1234 ZoneGrowableArray<int>* ranges, | 1187 ZoneGrowableArray<int>* ranges, |
| 1235 intptr_t start_index, | 1188 intptr_t start_index, |
| 1236 intptr_t end_index, | 1189 intptr_t end_index, |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1337 | 1290 |
| 1338 if (handle_rest.IsLinked()) { | 1291 if (handle_rest.IsLinked()) { |
| 1339 masm->BindBlock(&handle_rest); | 1292 masm->BindBlock(&handle_rest); |
| 1340 bool flip = (new_start_index & 1) != (start_index & 1); | 1293 bool flip = (new_start_index & 1) != (start_index & 1); |
| 1341 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, | 1294 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, |
| 1342 &dummy, flip ? odd_label : even_label, | 1295 &dummy, flip ? odd_label : even_label, |
| 1343 flip ? even_label : odd_label); | 1296 flip ? even_label : odd_label); |
| 1344 } | 1297 } |
| 1345 } | 1298 } |
| 1346 | 1299 |
| 1347 | |
| 1348 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1300 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 1349 RegExpCharacterClass* cc, | 1301 RegExpCharacterClass* cc, |
| 1350 bool one_byte, | 1302 bool one_byte, |
| 1351 BlockLabel* on_failure, | 1303 BlockLabel* on_failure, |
| 1352 intptr_t cp_offset, | 1304 intptr_t cp_offset, |
| 1353 bool check_offset, | 1305 bool check_offset, |
| 1354 bool preloaded, | 1306 bool preloaded, |
| 1355 Zone* zone) { | 1307 Zone* zone) { |
| 1356 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 1308 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
| 1357 if (!CharacterRange::IsCanonical(ranges)) { | 1309 if (!CharacterRange::IsCanonical(ranges)) { |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1403 if (check_offset) { | 1355 if (check_offset) { |
| 1404 macro_assembler->CheckPosition(cp_offset, on_failure); | 1356 macro_assembler->CheckPosition(cp_offset, on_failure); |
| 1405 } | 1357 } |
| 1406 return; | 1358 return; |
| 1407 } | 1359 } |
| 1408 | 1360 |
| 1409 if (!preloaded) { | 1361 if (!preloaded) { |
| 1410 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1362 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
| 1411 } | 1363 } |
| 1412 | 1364 |
| 1413 if (cc->is_standard() && | 1365 if (cc->is_standard() && macro_assembler->CheckSpecialCharacterClass( |
| 1414 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1366 cc->standard_type(), on_failure)) { |
| 1415 on_failure)) { | |
| 1416 return; | 1367 return; |
| 1417 } | 1368 } |
| 1418 | 1369 |
| 1419 | |
| 1420 // A new list with ascending entries. Each entry is a code unit | 1370 // A new list with ascending entries. Each entry is a code unit |
| 1421 // where there is a boundary between code units that are part of | 1371 // where there is a boundary between code units that are part of |
| 1422 // the class and code units that are not. Normally we insert an | 1372 // the class and code units that are not. Normally we insert an |
| 1423 // entry at zero which goes to the failure label, but if there | 1373 // entry at zero which goes to the failure label, but if there |
| 1424 // was already one there we fall through for success on that entry. | 1374 // was already one there we fall through for success on that entry. |
| 1425 // Subsequent entries have alternating meaning (success/failure). | 1375 // Subsequent entries have alternating meaning (success/failure). |
| 1426 ZoneGrowableArray<int>* range_boundaries = | 1376 ZoneGrowableArray<int>* range_boundaries = |
| 1427 new (zone) ZoneGrowableArray<int>(last_valid_range); | 1377 new (zone) ZoneGrowableArray<int>(last_valid_range); |
| 1428 | 1378 |
| 1429 bool zeroth_entry_is_failure = !cc->is_negated(); | 1379 bool zeroth_entry_is_failure = !cc->is_negated(); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1447 GenerateBranches(macro_assembler, range_boundaries, | 1397 GenerateBranches(macro_assembler, range_boundaries, |
| 1448 0, // start_index. | 1398 0, // start_index. |
| 1449 end_index, | 1399 end_index, |
| 1450 0, // min_char. | 1400 0, // min_char. |
| 1451 max_char, &fall_through, | 1401 max_char, &fall_through, |
| 1452 zeroth_entry_is_failure ? &fall_through : on_failure, | 1402 zeroth_entry_is_failure ? &fall_through : on_failure, |
| 1453 zeroth_entry_is_failure ? on_failure : &fall_through); | 1403 zeroth_entry_is_failure ? on_failure : &fall_through); |
| 1454 macro_assembler->BindBlock(&fall_through); | 1404 macro_assembler->BindBlock(&fall_through); |
| 1455 } | 1405 } |
| 1456 | 1406 |
| 1457 | |
| 1458 RegExpNode::~RegExpNode() {} | 1407 RegExpNode::~RegExpNode() {} |
| 1459 | 1408 |
| 1460 | |
| 1461 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1409 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
| 1462 Trace* trace) { | 1410 Trace* trace) { |
| 1463 // If we are generating a greedy loop then don't stop and don't reuse code. | 1411 // If we are generating a greedy loop then don't stop and don't reuse code. |
| 1464 if (trace->stop_node() != NULL) { | 1412 if (trace->stop_node() != NULL) { |
| 1465 return CONTINUE; | 1413 return CONTINUE; |
| 1466 } | 1414 } |
| 1467 | 1415 |
| 1468 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1416 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1469 if (trace->is_trivial()) { | 1417 if (trace->is_trivial()) { |
| 1470 if (label_.IsBound()) { | 1418 if (label_.IsBound()) { |
| (...skipping 22 matching lines...) Expand all Loading... |
| 1493 return CONTINUE; | 1441 return CONTINUE; |
| 1494 } | 1442 } |
| 1495 | 1443 |
| 1496 // If we get here code has been generated for this node too many times or | 1444 // If we get here code has been generated for this node too many times or |
| 1497 // recursion is too deep. Time to switch to a generic version. The code for | 1445 // recursion is too deep. Time to switch to a generic version. The code for |
| 1498 // generic versions above can handle deep recursion properly. | 1446 // generic versions above can handle deep recursion properly. |
| 1499 trace->Flush(compiler, this); | 1447 trace->Flush(compiler, this); |
| 1500 return DONE; | 1448 return DONE; |
| 1501 } | 1449 } |
| 1502 | 1450 |
| 1503 | |
| 1504 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, | 1451 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, |
| 1505 intptr_t budget, | 1452 intptr_t budget, |
| 1506 bool not_at_start) { | 1453 bool not_at_start) { |
| 1507 if (budget <= 0) return 0; | 1454 if (budget <= 0) return 0; |
| 1508 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 1455 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
| 1509 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1456 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1510 } | 1457 } |
| 1511 | 1458 |
| 1512 | |
| 1513 void ActionNode::FillInBMInfo(intptr_t offset, | 1459 void ActionNode::FillInBMInfo(intptr_t offset, |
| 1514 intptr_t budget, | 1460 intptr_t budget, |
| 1515 BoyerMooreLookahead* bm, | 1461 BoyerMooreLookahead* bm, |
| 1516 bool not_at_start) { | 1462 bool not_at_start) { |
| 1517 if (action_type_ == BEGIN_SUBMATCH) { | 1463 if (action_type_ == BEGIN_SUBMATCH) { |
| 1518 bm->SetRest(offset); | 1464 bm->SetRest(offset); |
| 1519 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 1465 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
| 1520 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1466 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 1521 } | 1467 } |
| 1522 SaveBMInfo(bm, not_at_start, offset); | 1468 SaveBMInfo(bm, not_at_start, offset); |
| 1523 } | 1469 } |
| 1524 | 1470 |
| 1525 | |
| 1526 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, | 1471 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, |
| 1527 intptr_t budget, | 1472 intptr_t budget, |
| 1528 bool not_at_start) { | 1473 bool not_at_start) { |
| 1529 if (budget <= 0) return 0; | 1474 if (budget <= 0) return 0; |
| 1530 // If we know we are not at the start and we are asked "how many characters | 1475 // If we know we are not at the start and we are asked "how many characters |
| 1531 // will you match if you succeed?" then we can answer anything since false | 1476 // will you match if you succeed?" then we can answer anything since false |
| 1532 // implies false. So lets just return the max answer (still_to_find) since | 1477 // implies false. So lets just return the max answer (still_to_find) since |
| 1533 // that won't prevent us from preloading a lot of characters for the other | 1478 // that won't prevent us from preloading a lot of characters for the other |
| 1534 // branches in the node graph. | 1479 // branches in the node graph. |
| 1535 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 1480 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
| 1536 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1481 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1537 } | 1482 } |
| 1538 | 1483 |
| 1539 | |
| 1540 void AssertionNode::FillInBMInfo(intptr_t offset, | 1484 void AssertionNode::FillInBMInfo(intptr_t offset, |
| 1541 intptr_t budget, | 1485 intptr_t budget, |
| 1542 BoyerMooreLookahead* bm, | 1486 BoyerMooreLookahead* bm, |
| 1543 bool not_at_start) { | 1487 bool not_at_start) { |
| 1544 // Match the behaviour of EatsAtLeast on this node. | 1488 // Match the behaviour of EatsAtLeast on this node. |
| 1545 if (assertion_type() == AT_START && not_at_start) return; | 1489 if (assertion_type() == AT_START && not_at_start) return; |
| 1546 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1490 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 1547 SaveBMInfo(bm, not_at_start, offset); | 1491 SaveBMInfo(bm, not_at_start, offset); |
| 1548 } | 1492 } |
| 1549 | 1493 |
| 1550 | |
| 1551 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, | 1494 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, |
| 1552 intptr_t budget, | 1495 intptr_t budget, |
| 1553 bool not_at_start) { | 1496 bool not_at_start) { |
| 1554 if (budget <= 0) return 0; | 1497 if (budget <= 0) return 0; |
| 1555 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1498 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1556 } | 1499 } |
| 1557 | 1500 |
| 1558 | |
| 1559 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, | 1501 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, |
| 1560 intptr_t budget, | 1502 intptr_t budget, |
| 1561 bool not_at_start) { | 1503 bool not_at_start) { |
| 1562 intptr_t answer = Length(); | 1504 intptr_t answer = Length(); |
| 1563 if (answer >= still_to_find) return answer; | 1505 if (answer >= still_to_find) return answer; |
| 1564 if (budget <= 0) return answer; | 1506 if (budget <= 0) return answer; |
| 1565 // We are not at start after this node so we set the last argument to 'true'. | 1507 // We are not at start after this node so we set the last argument to 'true'. |
| 1566 return answer + | 1508 return answer + |
| 1567 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); | 1509 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); |
| 1568 } | 1510 } |
| 1569 | 1511 |
| 1570 | |
| 1571 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1512 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, |
| 1572 intptr_t budget, | 1513 intptr_t budget, |
| 1573 bool not_at_start) { | 1514 bool not_at_start) { |
| 1574 if (budget <= 0) return 0; | 1515 if (budget <= 0) return 0; |
| 1575 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1516 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 1576 // afterwards. | 1517 // afterwards. |
| 1577 RegExpNode* node = (*alternatives_)[1].node(); | 1518 RegExpNode* node = (*alternatives_)[1].node(); |
| 1578 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); | 1519 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1579 } | 1520 } |
| 1580 | 1521 |
| 1581 | |
| 1582 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( | 1522 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( |
| 1583 QuickCheckDetails* details, | 1523 QuickCheckDetails* details, |
| 1584 RegExpCompiler* compiler, | 1524 RegExpCompiler* compiler, |
| 1585 intptr_t filled_in, | 1525 intptr_t filled_in, |
| 1586 bool not_at_start) { | 1526 bool not_at_start) { |
| 1587 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1527 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 1588 // afterwards. | 1528 // afterwards. |
| 1589 RegExpNode* node = (*alternatives_)[1].node(); | 1529 RegExpNode* node = (*alternatives_)[1].node(); |
| 1590 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); | 1530 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); |
| 1591 } | 1531 } |
| 1592 | 1532 |
| 1593 | |
| 1594 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, | 1533 intptr_t ChoiceNode::EatsAtLeastHelper(intptr_t still_to_find, |
| 1595 intptr_t budget, | 1534 intptr_t budget, |
| 1596 RegExpNode* ignore_this_node, | 1535 RegExpNode* ignore_this_node, |
| 1597 bool not_at_start) { | 1536 bool not_at_start) { |
| 1598 if (budget <= 0) return 0; | 1537 if (budget <= 0) return 0; |
| 1599 intptr_t min = 100; | 1538 intptr_t min = 100; |
| 1600 intptr_t choice_count = alternatives_->length(); | 1539 intptr_t choice_count = alternatives_->length(); |
| 1601 budget = (budget - 1) / choice_count; | 1540 budget = (budget - 1) / choice_count; |
| 1602 for (intptr_t i = 0; i < choice_count; i++) { | 1541 for (intptr_t i = 0; i < choice_count; i++) { |
| 1603 RegExpNode* node = (*alternatives_)[i].node(); | 1542 RegExpNode* node = (*alternatives_)[i].node(); |
| 1604 if (node == ignore_this_node) continue; | 1543 if (node == ignore_this_node) continue; |
| 1605 intptr_t node_eats_at_least = | 1544 intptr_t node_eats_at_least = |
| 1606 node->EatsAtLeast(still_to_find, budget, not_at_start); | 1545 node->EatsAtLeast(still_to_find, budget, not_at_start); |
| 1607 if (node_eats_at_least < min) min = node_eats_at_least; | 1546 if (node_eats_at_least < min) min = node_eats_at_least; |
| 1608 if (min == 0) return 0; | 1547 if (min == 0) return 0; |
| 1609 } | 1548 } |
| 1610 return min; | 1549 return min; |
| 1611 } | 1550 } |
| 1612 | 1551 |
| 1613 | |
| 1614 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1552 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, |
| 1615 intptr_t budget, | 1553 intptr_t budget, |
| 1616 bool not_at_start) { | 1554 bool not_at_start) { |
| 1617 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); | 1555 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); |
| 1618 } | 1556 } |
| 1619 | 1557 |
| 1620 | |
| 1621 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1558 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, |
| 1622 intptr_t budget, | 1559 intptr_t budget, |
| 1623 bool not_at_start) { | 1560 bool not_at_start) { |
| 1624 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); | 1561 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); |
| 1625 } | 1562 } |
| 1626 | 1563 |
| 1627 | |
| 1628 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 1564 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
| 1629 static inline uint32_t SmearBitsRight(uint32_t v) { | 1565 static inline uint32_t SmearBitsRight(uint32_t v) { |
| 1630 v |= v >> 1; | 1566 v |= v >> 1; |
| 1631 v |= v >> 2; | 1567 v |= v >> 2; |
| 1632 v |= v >> 4; | 1568 v |= v >> 4; |
| 1633 v |= v >> 8; | 1569 v |= v >> 8; |
| 1634 v |= v >> 16; | 1570 v |= v >> 16; |
| 1635 return v; | 1571 return v; |
| 1636 } | 1572 } |
| 1637 | 1573 |
| 1638 | |
| 1639 bool QuickCheckDetails::Rationalize(bool asc) { | 1574 bool QuickCheckDetails::Rationalize(bool asc) { |
| 1640 bool found_useful_op = false; | 1575 bool found_useful_op = false; |
| 1641 uint32_t char_mask; | 1576 uint32_t char_mask; |
| 1642 if (asc) { | 1577 if (asc) { |
| 1643 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1578 char_mask = Symbols::kMaxOneCharCodeSymbol; |
| 1644 } else { | 1579 } else { |
| 1645 char_mask = Utf16::kMaxCodeUnit; | 1580 char_mask = Utf16::kMaxCodeUnit; |
| 1646 } | 1581 } |
| 1647 mask_ = 0; | 1582 mask_ = 0; |
| 1648 value_ = 0; | 1583 value_ = 0; |
| 1649 intptr_t char_shift = 0; | 1584 intptr_t char_shift = 0; |
| 1650 for (intptr_t i = 0; i < characters_; i++) { | 1585 for (intptr_t i = 0; i < characters_; i++) { |
| 1651 Position* pos = &positions_[i]; | 1586 Position* pos = &positions_[i]; |
| 1652 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { | 1587 if ((pos->mask & Symbols::kMaxOneCharCodeSymbol) != 0) { |
| 1653 found_useful_op = true; | 1588 found_useful_op = true; |
| 1654 } | 1589 } |
| 1655 mask_ |= (pos->mask & char_mask) << char_shift; | 1590 mask_ |= (pos->mask & char_mask) << char_shift; |
| 1656 value_ |= (pos->value & char_mask) << char_shift; | 1591 value_ |= (pos->value & char_mask) << char_shift; |
| 1657 char_shift += asc ? 8 : 16; | 1592 char_shift += asc ? 8 : 16; |
| 1658 } | 1593 } |
| 1659 return found_useful_op; | 1594 return found_useful_op; |
| 1660 } | 1595 } |
| 1661 | 1596 |
| 1662 | |
| 1663 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 1597 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
| 1664 Trace* bounds_check_trace, | 1598 Trace* bounds_check_trace, |
| 1665 Trace* trace, | 1599 Trace* trace, |
| 1666 bool preload_has_checked_bounds, | 1600 bool preload_has_checked_bounds, |
| 1667 BlockLabel* on_possible_success, | 1601 BlockLabel* on_possible_success, |
| 1668 QuickCheckDetails* details, | 1602 QuickCheckDetails* details, |
| 1669 bool fall_through_on_failure) { | 1603 bool fall_through_on_failure) { |
| 1670 if (details->characters() == 0) return false; | 1604 if (details->characters() == 0) return false; |
| 1671 GetQuickCheckDetails(details, compiler, 0, | 1605 GetQuickCheckDetails(details, compiler, 0, |
| 1672 trace->at_start() == Trace::FALSE_VALUE); | 1606 trace->at_start() == Trace::FALSE_VALUE); |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1683 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); | 1617 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); |
| 1684 // We are attempting to preload the minimum number of characters | 1618 // We are attempting to preload the minimum number of characters |
| 1685 // any choice would eat, so if the bounds check fails, then none of the | 1619 // any choice would eat, so if the bounds check fails, then none of the |
| 1686 // choices can succeed, so we can just immediately backtrack, rather | 1620 // choices can succeed, so we can just immediately backtrack, rather |
| 1687 // than go to the next choice. | 1621 // than go to the next choice. |
| 1688 assembler->LoadCurrentCharacter( | 1622 assembler->LoadCurrentCharacter( |
| 1689 trace->cp_offset(), bounds_check_trace->backtrack(), | 1623 trace->cp_offset(), bounds_check_trace->backtrack(), |
| 1690 !preload_has_checked_bounds, details->characters()); | 1624 !preload_has_checked_bounds, details->characters()); |
| 1691 } | 1625 } |
| 1692 | 1626 |
| 1693 | |
| 1694 bool need_mask = true; | 1627 bool need_mask = true; |
| 1695 | 1628 |
| 1696 if (details->characters() == 1) { | 1629 if (details->characters() == 1) { |
| 1697 // If number of characters preloaded is 1 then we used a byte or 16 bit | 1630 // If number of characters preloaded is 1 then we used a byte or 16 bit |
| 1698 // load so the value is already masked down. | 1631 // load so the value is already masked down. |
| 1699 uint32_t char_mask; | 1632 uint32_t char_mask; |
| 1700 if (compiler->one_byte()) { | 1633 if (compiler->one_byte()) { |
| 1701 char_mask = Symbols::kMaxOneCharCodeSymbol; | 1634 char_mask = Symbols::kMaxOneCharCodeSymbol; |
| 1702 } else { | 1635 } else { |
| 1703 char_mask = Utf16::kMaxCodeUnit; | 1636 char_mask = Utf16::kMaxCodeUnit; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 1725 } else { | 1658 } else { |
| 1726 if (need_mask) { | 1659 if (need_mask) { |
| 1727 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); | 1660 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); |
| 1728 } else { | 1661 } else { |
| 1729 assembler->CheckNotCharacter(value, trace->backtrack()); | 1662 assembler->CheckNotCharacter(value, trace->backtrack()); |
| 1730 } | 1663 } |
| 1731 } | 1664 } |
| 1732 return true; | 1665 return true; |
| 1733 } | 1666 } |
| 1734 | 1667 |
| 1735 | |
| 1736 // Here is the meat of GetQuickCheckDetails (see also the comment on the | 1668 // Here is the meat of GetQuickCheckDetails (see also the comment on the |
| 1737 // super-class in the .h file). | 1669 // super-class in the .h file). |
| 1738 // | 1670 // |
| 1739 // We iterate along the text object, building up for each character a | 1671 // We iterate along the text object, building up for each character a |
| 1740 // mask and value that can be used to test for a quick failure to match. | 1672 // mask and value that can be used to test for a quick failure to match. |
| 1741 // The masks and values for the positions will be combined into a single | 1673 // The masks and values for the positions will be combined into a single |
| 1742 // machine word for the current character width in order to be used in | 1674 // machine word for the current character width in order to be used in |
| 1743 // generating a quick check. | 1675 // generating a quick check. |
| 1744 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 1676 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 1745 RegExpCompiler* compiler, | 1677 RegExpCompiler* compiler, |
| (...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1889 } | 1821 } |
| 1890 } | 1822 } |
| 1891 } | 1823 } |
| 1892 ASSERT(characters_filled_in != details->characters()); | 1824 ASSERT(characters_filled_in != details->characters()); |
| 1893 if (!details->cannot_match()) { | 1825 if (!details->cannot_match()) { |
| 1894 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, | 1826 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, |
| 1895 true); | 1827 true); |
| 1896 } | 1828 } |
| 1897 } | 1829 } |
| 1898 | 1830 |
| 1899 | |
| 1900 void QuickCheckDetails::Clear() { | 1831 void QuickCheckDetails::Clear() { |
| 1901 for (int i = 0; i < characters_; i++) { | 1832 for (int i = 0; i < characters_; i++) { |
| 1902 positions_[i].mask = 0; | 1833 positions_[i].mask = 0; |
| 1903 positions_[i].value = 0; | 1834 positions_[i].value = 0; |
| 1904 positions_[i].determines_perfectly = false; | 1835 positions_[i].determines_perfectly = false; |
| 1905 } | 1836 } |
| 1906 characters_ = 0; | 1837 characters_ = 0; |
| 1907 } | 1838 } |
| 1908 | 1839 |
| 1909 | |
| 1910 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { | 1840 void QuickCheckDetails::Advance(intptr_t by, bool one_byte) { |
| 1911 ASSERT(by >= 0); | 1841 ASSERT(by >= 0); |
| 1912 if (by >= characters_) { | 1842 if (by >= characters_) { |
| 1913 Clear(); | 1843 Clear(); |
| 1914 return; | 1844 return; |
| 1915 } | 1845 } |
| 1916 for (intptr_t i = 0; i < characters_ - by; i++) { | 1846 for (intptr_t i = 0; i < characters_ - by; i++) { |
| 1917 positions_[i] = positions_[by + i]; | 1847 positions_[i] = positions_[by + i]; |
| 1918 } | 1848 } |
| 1919 for (intptr_t i = characters_ - by; i < characters_; i++) { | 1849 for (intptr_t i = characters_ - by; i < characters_; i++) { |
| 1920 positions_[i].mask = 0; | 1850 positions_[i].mask = 0; |
| 1921 positions_[i].value = 0; | 1851 positions_[i].value = 0; |
| 1922 positions_[i].determines_perfectly = false; | 1852 positions_[i].determines_perfectly = false; |
| 1923 } | 1853 } |
| 1924 characters_ -= by; | 1854 characters_ -= by; |
| 1925 // We could change mask_ and value_ here but we would never advance unless | 1855 // We could change mask_ and value_ here but we would never advance unless |
| 1926 // they had already been used in a check and they won't be used again because | 1856 // they had already been used in a check and they won't be used again because |
| 1927 // it would gain us nothing. So there's no point. | 1857 // it would gain us nothing. So there's no point. |
| 1928 } | 1858 } |
| 1929 | 1859 |
| 1930 | |
| 1931 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { | 1860 void QuickCheckDetails::Merge(QuickCheckDetails* other, intptr_t from_index) { |
| 1932 ASSERT(characters_ == other->characters_); | 1861 ASSERT(characters_ == other->characters_); |
| 1933 if (other->cannot_match_) { | 1862 if (other->cannot_match_) { |
| 1934 return; | 1863 return; |
| 1935 } | 1864 } |
| 1936 if (cannot_match_) { | 1865 if (cannot_match_) { |
| 1937 *this = *other; | 1866 *this = *other; |
| 1938 return; | 1867 return; |
| 1939 } | 1868 } |
| 1940 for (intptr_t i = from_index; i < characters_; i++) { | 1869 for (intptr_t i = from_index; i < characters_; i++) { |
| 1941 QuickCheckDetails::Position* pos = positions(i); | 1870 QuickCheckDetails::Position* pos = positions(i); |
| 1942 QuickCheckDetails::Position* other_pos = other->positions(i); | 1871 QuickCheckDetails::Position* other_pos = other->positions(i); |
| 1943 if (pos->mask != other_pos->mask || pos->value != other_pos->value || | 1872 if (pos->mask != other_pos->mask || pos->value != other_pos->value || |
| 1944 !other_pos->determines_perfectly) { | 1873 !other_pos->determines_perfectly) { |
| 1945 // Our mask-compare operation will be approximate unless we have the | 1874 // Our mask-compare operation will be approximate unless we have the |
| 1946 // exact same operation on both sides of the alternation. | 1875 // exact same operation on both sides of the alternation. |
| 1947 pos->determines_perfectly = false; | 1876 pos->determines_perfectly = false; |
| 1948 } | 1877 } |
| 1949 pos->mask &= other_pos->mask; | 1878 pos->mask &= other_pos->mask; |
| 1950 pos->value &= pos->mask; | 1879 pos->value &= pos->mask; |
| 1951 other_pos->value &= pos->mask; | 1880 other_pos->value &= pos->mask; |
| 1952 uint16_t differing_bits = (pos->value ^ other_pos->value); | 1881 uint16_t differing_bits = (pos->value ^ other_pos->value); |
| 1953 pos->mask &= ~differing_bits; | 1882 pos->mask &= ~differing_bits; |
| 1954 pos->value &= pos->mask; | 1883 pos->value &= pos->mask; |
| 1955 } | 1884 } |
| 1956 } | 1885 } |
| 1957 | 1886 |
| 1958 | |
| 1959 class VisitMarker : public ValueObject { | 1887 class VisitMarker : public ValueObject { |
| 1960 public: | 1888 public: |
| 1961 explicit VisitMarker(NodeInfo* info) : info_(info) { | 1889 explicit VisitMarker(NodeInfo* info) : info_(info) { |
| 1962 ASSERT(!info->visited); | 1890 ASSERT(!info->visited); |
| 1963 info->visited = true; | 1891 info->visited = true; |
| 1964 } | 1892 } |
| 1965 ~VisitMarker() { info_->visited = false; } | 1893 ~VisitMarker() { info_->visited = false; } |
| 1966 | 1894 |
| 1967 private: | 1895 private: |
| 1968 NodeInfo* info_; | 1896 NodeInfo* info_; |
| 1969 }; | 1897 }; |
| 1970 | 1898 |
| 1971 | |
| 1972 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1899 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
| 1973 if (info()->replacement_calculated) return replacement(); | 1900 if (info()->replacement_calculated) return replacement(); |
| 1974 if (depth < 0) return this; | 1901 if (depth < 0) return this; |
| 1975 ASSERT(!info()->visited); | 1902 ASSERT(!info()->visited); |
| 1976 VisitMarker marker(info()); | 1903 VisitMarker marker(info()); |
| 1977 return FilterSuccessor(depth - 1, ignore_case); | 1904 return FilterSuccessor(depth - 1, ignore_case); |
| 1978 } | 1905 } |
| 1979 | 1906 |
| 1980 | |
| 1981 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { | 1907 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { |
| 1982 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 1908 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
| 1983 if (next == NULL) return set_replacement(NULL); | 1909 if (next == NULL) return set_replacement(NULL); |
| 1984 on_success_ = next; | 1910 on_success_ = next; |
| 1985 return set_replacement(this); | 1911 return set_replacement(this); |
| 1986 } | 1912 } |
| 1987 | 1913 |
| 1988 | |
| 1989 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 1914 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
| 1990 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 1915 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
| 1991 // TODO(dcarney): this could be a lot more efficient. | 1916 // TODO(dcarney): this could be a lot more efficient. |
| 1992 return range.Contains(0x39c) || range.Contains(0x3bc) || | 1917 return range.Contains(0x39c) || range.Contains(0x3bc) || |
| 1993 range.Contains(0x178); | 1918 range.Contains(0x178); |
| 1994 } | 1919 } |
| 1995 | 1920 |
| 1996 | |
| 1997 static bool RangesContainLatin1Equivalents( | 1921 static bool RangesContainLatin1Equivalents( |
| 1998 ZoneGrowableArray<CharacterRange>* ranges) { | 1922 ZoneGrowableArray<CharacterRange>* ranges) { |
| 1999 for (intptr_t i = 0; i < ranges->length(); i++) { | 1923 for (intptr_t i = 0; i < ranges->length(); i++) { |
| 2000 // TODO(dcarney): this could be a lot more efficient. | 1924 // TODO(dcarney): this could be a lot more efficient. |
| 2001 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; | 1925 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; |
| 2002 } | 1926 } |
| 2003 return false; | 1927 return false; |
| 2004 } | 1928 } |
| 2005 | 1929 |
| 2006 | |
| 2007 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { | 1930 static uint16_t ConvertNonLatin1ToLatin1(uint16_t c) { |
| 2008 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); | 1931 ASSERT(c > Symbols::kMaxOneCharCodeSymbol); |
| 2009 switch (c) { | 1932 switch (c) { |
| 2010 // This are equivalent characters in unicode. | 1933 // This are equivalent characters in unicode. |
| 2011 case 0x39c: | 1934 case 0x39c: |
| 2012 case 0x3bc: | 1935 case 0x3bc: |
| 2013 return 0xb5; | 1936 return 0xb5; |
| 2014 // This is an uppercase of a Latin-1 character | 1937 // This is an uppercase of a Latin-1 character |
| 2015 // outside of Latin-1. | 1938 // outside of Latin-1. |
| 2016 case 0x178: | 1939 case 0x178: |
| 2017 return 0xff; | 1940 return 0xff; |
| 2018 } | 1941 } |
| 2019 return 0; | 1942 return 0; |
| 2020 } | 1943 } |
| 2021 | 1944 |
| 2022 | |
| 2023 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1945 RegExpNode* TextNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
| 2024 if (info()->replacement_calculated) return replacement(); | 1946 if (info()->replacement_calculated) return replacement(); |
| 2025 if (depth < 0) return this; | 1947 if (depth < 0) return this; |
| 2026 ASSERT(!info()->visited); | 1948 ASSERT(!info()->visited); |
| 2027 VisitMarker marker(info()); | 1949 VisitMarker marker(info()); |
| 2028 intptr_t element_count = elms_->length(); | 1950 intptr_t element_count = elms_->length(); |
| 2029 for (intptr_t i = 0; i < element_count; i++) { | 1951 for (intptr_t i = 0; i < element_count; i++) { |
| 2030 TextElement elm = elms_->At(i); | 1952 TextElement elm = elms_->At(i); |
| 2031 if (elm.text_type() == TextElement::ATOM) { | 1953 if (elm.text_type() == TextElement::ATOM) { |
| 2032 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); | 1954 ZoneGrowableArray<uint16_t>* quarks = elm.atom()->data(); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2064 // This will be handled in a later filter. | 1986 // This will be handled in a later filter. |
| 2065 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 1987 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| 2066 return set_replacement(NULL); | 1988 return set_replacement(NULL); |
| 2067 } | 1989 } |
| 2068 } | 1990 } |
| 2069 } | 1991 } |
| 2070 } | 1992 } |
| 2071 return FilterSuccessor(depth - 1, ignore_case); | 1993 return FilterSuccessor(depth - 1, ignore_case); |
| 2072 } | 1994 } |
| 2073 | 1995 |
| 2074 | |
| 2075 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1996 RegExpNode* LoopChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
| 2076 if (info()->replacement_calculated) return replacement(); | 1997 if (info()->replacement_calculated) return replacement(); |
| 2077 if (depth < 0) return this; | 1998 if (depth < 0) return this; |
| 2078 if (info()->visited) return this; | 1999 if (info()->visited) return this; |
| 2079 { | 2000 { |
| 2080 VisitMarker marker(info()); | 2001 VisitMarker marker(info()); |
| 2081 | 2002 |
| 2082 RegExpNode* continue_replacement = | 2003 RegExpNode* continue_replacement = |
| 2083 continue_node_->FilterOneByte(depth - 1, ignore_case); | 2004 continue_node_->FilterOneByte(depth - 1, ignore_case); |
| 2084 // If we can't continue after the loop then there is no sense in doing the | 2005 // If we can't continue after the loop then there is no sense in doing the |
| 2085 // loop. | 2006 // loop. |
| 2086 if (continue_replacement == NULL) return set_replacement(NULL); | 2007 if (continue_replacement == NULL) return set_replacement(NULL); |
| 2087 } | 2008 } |
| 2088 | 2009 |
| 2089 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); | 2010 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
| 2090 } | 2011 } |
| 2091 | 2012 |
| 2092 | |
| 2093 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 2013 RegExpNode* ChoiceNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
| 2094 if (info()->replacement_calculated) return replacement(); | 2014 if (info()->replacement_calculated) return replacement(); |
| 2095 if (depth < 0) return this; | 2015 if (depth < 0) return this; |
| 2096 if (info()->visited) return this; | 2016 if (info()->visited) return this; |
| 2097 VisitMarker marker(info()); | 2017 VisitMarker marker(info()); |
| 2098 intptr_t choice_count = alternatives_->length(); | 2018 intptr_t choice_count = alternatives_->length(); |
| 2099 | 2019 |
| 2100 for (intptr_t i = 0; i < choice_count; i++) { | 2020 for (intptr_t i = 0; i < choice_count; i++) { |
| 2101 GuardedAlternative alternative = alternatives_->At(i); | 2021 GuardedAlternative alternative = alternatives_->At(i); |
| 2102 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2022 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
| (...skipping 30 matching lines...) Expand all Loading... |
| 2133 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); | 2053 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); |
| 2134 if (replacement != NULL) { | 2054 if (replacement != NULL) { |
| 2135 (*alternatives_)[i].set_node(replacement); | 2055 (*alternatives_)[i].set_node(replacement); |
| 2136 new_alternatives->Add((*alternatives_)[i]); | 2056 new_alternatives->Add((*alternatives_)[i]); |
| 2137 } | 2057 } |
| 2138 } | 2058 } |
| 2139 alternatives_ = new_alternatives; | 2059 alternatives_ = new_alternatives; |
| 2140 return this; | 2060 return this; |
| 2141 } | 2061 } |
| 2142 | 2062 |
| 2143 | |
| 2144 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, | 2063 RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(intptr_t depth, |
| 2145 bool ignore_case) { | 2064 bool ignore_case) { |
| 2146 if (info()->replacement_calculated) return replacement(); | 2065 if (info()->replacement_calculated) return replacement(); |
| 2147 if (depth < 0) return this; | 2066 if (depth < 0) return this; |
| 2148 if (info()->visited) return this; | 2067 if (info()->visited) return this; |
| 2149 VisitMarker marker(info()); | 2068 VisitMarker marker(info()); |
| 2150 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 2069 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 2151 // afterwards. | 2070 // afterwards. |
| 2152 RegExpNode* node = (*alternatives_)[1].node(); | 2071 RegExpNode* node = (*alternatives_)[1].node(); |
| 2153 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); | 2072 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
| 2154 if (replacement == NULL) return set_replacement(NULL); | 2073 if (replacement == NULL) return set_replacement(NULL); |
| 2155 (*alternatives_)[1].set_node(replacement); | 2074 (*alternatives_)[1].set_node(replacement); |
| 2156 | 2075 |
| 2157 RegExpNode* neg_node = (*alternatives_)[0].node(); | 2076 RegExpNode* neg_node = (*alternatives_)[0].node(); |
| 2158 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); | 2077 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
| 2159 // If the negative lookahead is always going to fail then | 2078 // If the negative lookahead is always going to fail then |
| 2160 // we don't need to check it. | 2079 // we don't need to check it. |
| 2161 if (neg_replacement == NULL) return set_replacement(replacement); | 2080 if (neg_replacement == NULL) return set_replacement(replacement); |
| 2162 (*alternatives_)[0].set_node(neg_replacement); | 2081 (*alternatives_)[0].set_node(neg_replacement); |
| 2163 return set_replacement(this); | 2082 return set_replacement(this); |
| 2164 } | 2083 } |
| 2165 | 2084 |
| 2166 | |
| 2167 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2085 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2168 RegExpCompiler* compiler, | 2086 RegExpCompiler* compiler, |
| 2169 intptr_t characters_filled_in, | 2087 intptr_t characters_filled_in, |
| 2170 bool not_at_start) { | 2088 bool not_at_start) { |
| 2171 if (body_can_be_zero_length_ || info()->visited) return; | 2089 if (body_can_be_zero_length_ || info()->visited) return; |
| 2172 VisitMarker marker(info()); | 2090 VisitMarker marker(info()); |
| 2173 return ChoiceNode::GetQuickCheckDetails(details, compiler, | 2091 return ChoiceNode::GetQuickCheckDetails(details, compiler, |
| 2174 characters_filled_in, not_at_start); | 2092 characters_filled_in, not_at_start); |
| 2175 } | 2093 } |
| 2176 | 2094 |
| 2177 | |
| 2178 void LoopChoiceNode::FillInBMInfo(intptr_t offset, | 2095 void LoopChoiceNode::FillInBMInfo(intptr_t offset, |
| 2179 intptr_t budget, | 2096 intptr_t budget, |
| 2180 BoyerMooreLookahead* bm, | 2097 BoyerMooreLookahead* bm, |
| 2181 bool not_at_start) { | 2098 bool not_at_start) { |
| 2182 if (body_can_be_zero_length_ || budget <= 0) { | 2099 if (body_can_be_zero_length_ || budget <= 0) { |
| 2183 bm->SetRest(offset); | 2100 bm->SetRest(offset); |
| 2184 SaveBMInfo(bm, not_at_start, offset); | 2101 SaveBMInfo(bm, not_at_start, offset); |
| 2185 return; | 2102 return; |
| 2186 } | 2103 } |
| 2187 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); | 2104 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 2188 SaveBMInfo(bm, not_at_start, offset); | 2105 SaveBMInfo(bm, not_at_start, offset); |
| 2189 } | 2106 } |
| 2190 | 2107 |
| 2191 | |
| 2192 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2108 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2193 RegExpCompiler* compiler, | 2109 RegExpCompiler* compiler, |
| 2194 intptr_t characters_filled_in, | 2110 intptr_t characters_filled_in, |
| 2195 bool not_at_start) { | 2111 bool not_at_start) { |
| 2196 not_at_start = (not_at_start || not_at_start_); | 2112 not_at_start = (not_at_start || not_at_start_); |
| 2197 intptr_t choice_count = alternatives_->length(); | 2113 intptr_t choice_count = alternatives_->length(); |
| 2198 ASSERT(choice_count > 0); | 2114 ASSERT(choice_count > 0); |
| 2199 (*alternatives_)[0].node()->GetQuickCheckDetails( | 2115 (*alternatives_)[0].node()->GetQuickCheckDetails( |
| 2200 details, compiler, characters_filled_in, not_at_start); | 2116 details, compiler, characters_filled_in, not_at_start); |
| 2201 for (intptr_t i = 1; i < choice_count; i++) { | 2117 for (intptr_t i = 1; i < choice_count; i++) { |
| 2202 QuickCheckDetails new_details(details->characters()); | 2118 QuickCheckDetails new_details(details->characters()); |
| 2203 RegExpNode* node = (*alternatives_)[i].node(); | 2119 RegExpNode* node = (*alternatives_)[i].node(); |
| 2204 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, | 2120 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, |
| 2205 not_at_start); | 2121 not_at_start); |
| 2206 // Here we merge the quick match details of the two branches. | 2122 // Here we merge the quick match details of the two branches. |
| 2207 details->Merge(&new_details, characters_filled_in); | 2123 details->Merge(&new_details, characters_filled_in); |
| 2208 } | 2124 } |
| 2209 } | 2125 } |
| 2210 | 2126 |
| 2211 | |
| 2212 // Check for [0-9A-Z_a-z]. | 2127 // Check for [0-9A-Z_a-z]. |
| 2213 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2128 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
| 2214 BlockLabel* word, | 2129 BlockLabel* word, |
| 2215 BlockLabel* non_word, | 2130 BlockLabel* non_word, |
| 2216 bool fall_through_on_word) { | 2131 bool fall_through_on_word) { |
| 2217 if (assembler->CheckSpecialCharacterClass( | 2132 if (assembler->CheckSpecialCharacterClass( |
| 2218 fall_through_on_word ? 'w' : 'W', | 2133 fall_through_on_word ? 'w' : 'W', |
| 2219 fall_through_on_word ? non_word : word)) { | 2134 fall_through_on_word ? non_word : word)) { |
| 2220 // Optimized implementation available. | 2135 // Optimized implementation available. |
| 2221 return; | 2136 return; |
| 2222 } | 2137 } |
| 2223 assembler->CheckCharacterGT('z', non_word); | 2138 assembler->CheckCharacterGT('z', non_word); |
| 2224 assembler->CheckCharacterLT('0', non_word); | 2139 assembler->CheckCharacterLT('0', non_word); |
| 2225 assembler->CheckCharacterGT('a' - 1, word); | 2140 assembler->CheckCharacterGT('a' - 1, word); |
| 2226 assembler->CheckCharacterLT('9' + 1, word); | 2141 assembler->CheckCharacterLT('9' + 1, word); |
| 2227 assembler->CheckCharacterLT('A', non_word); | 2142 assembler->CheckCharacterLT('A', non_word); |
| 2228 assembler->CheckCharacterLT('Z' + 1, word); | 2143 assembler->CheckCharacterLT('Z' + 1, word); |
| 2229 if (fall_through_on_word) { | 2144 if (fall_through_on_word) { |
| 2230 assembler->CheckNotCharacter('_', non_word); | 2145 assembler->CheckNotCharacter('_', non_word); |
| 2231 } else { | 2146 } else { |
| 2232 assembler->CheckCharacter('_', word); | 2147 assembler->CheckCharacter('_', word); |
| 2233 } | 2148 } |
| 2234 } | 2149 } |
| 2235 | 2150 |
| 2236 | |
| 2237 // Emit the code to check for a ^ in multiline mode (1-character lookbehind | 2151 // Emit the code to check for a ^ in multiline mode (1-character lookbehind |
| 2238 // that matches newline or the start of input). | 2152 // that matches newline or the start of input). |
| 2239 static void EmitHat(RegExpCompiler* compiler, | 2153 static void EmitHat(RegExpCompiler* compiler, |
| 2240 RegExpNode* on_success, | 2154 RegExpNode* on_success, |
| 2241 Trace* trace) { | 2155 Trace* trace) { |
| 2242 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2156 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2243 // We will be loading the previous character into the current character | 2157 // We will be loading the previous character into the current character |
| 2244 // register. | 2158 // register. |
| 2245 Trace new_trace(*trace); | 2159 Trace new_trace(*trace); |
| 2246 new_trace.InvalidateCurrentCharacter(); | 2160 new_trace.InvalidateCurrentCharacter(); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 2260 if (!compiler->one_byte()) { | 2174 if (!compiler->one_byte()) { |
| 2261 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2175 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
| 2262 } | 2176 } |
| 2263 assembler->CheckCharacter('\n', &ok); | 2177 assembler->CheckCharacter('\n', &ok); |
| 2264 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 2178 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
| 2265 } | 2179 } |
| 2266 assembler->BindBlock(&ok); | 2180 assembler->BindBlock(&ok); |
| 2267 on_success->Emit(compiler, &new_trace); | 2181 on_success->Emit(compiler, &new_trace); |
| 2268 } | 2182 } |
| 2269 | 2183 |
| 2270 | |
| 2271 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2184 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
| 2272 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 2185 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
| 2273 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2186 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2274 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2187 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
| 2275 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 2188 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
| 2276 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2189 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
| 2277 if (lookahead == NULL) { | 2190 if (lookahead == NULL) { |
| 2278 intptr_t eats_at_least = | 2191 intptr_t eats_at_least = |
| 2279 Utils::Minimum(kMaxLookaheadForBoyerMoore, | 2192 Utils::Minimum(kMaxLookaheadForBoyerMoore, |
| 2280 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, | 2193 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2315 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2228 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
| 2316 assembler->BindBlock(&ok); | 2229 assembler->BindBlock(&ok); |
| 2317 } else if (next_is_word_character == Trace::TRUE_VALUE) { | 2230 } else if (next_is_word_character == Trace::TRUE_VALUE) { |
| 2318 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); | 2231 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); |
| 2319 } else { | 2232 } else { |
| 2320 ASSERT(next_is_word_character == Trace::FALSE_VALUE); | 2233 ASSERT(next_is_word_character == Trace::FALSE_VALUE); |
| 2321 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); | 2234 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); |
| 2322 } | 2235 } |
| 2323 } | 2236 } |
| 2324 | 2237 |
| 2325 | |
| 2326 void AssertionNode::BacktrackIfPrevious( | 2238 void AssertionNode::BacktrackIfPrevious( |
| 2327 RegExpCompiler* compiler, | 2239 RegExpCompiler* compiler, |
| 2328 Trace* trace, | 2240 Trace* trace, |
| 2329 AssertionNode::IfPrevious backtrack_if_previous) { | 2241 AssertionNode::IfPrevious backtrack_if_previous) { |
| 2330 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2242 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2331 Trace new_trace(*trace); | 2243 Trace new_trace(*trace); |
| 2332 new_trace.InvalidateCurrentCharacter(); | 2244 new_trace.InvalidateCurrentCharacter(); |
| 2333 | 2245 |
| 2334 BlockLabel fall_through, dummy; | 2246 BlockLabel fall_through, dummy; |
| 2335 | 2247 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 2347 } | 2259 } |
| 2348 // We already checked that we are not at the start of input so it must be | 2260 // We already checked that we are not at the start of input so it must be |
| 2349 // OK to load the previous character. | 2261 // OK to load the previous character. |
| 2350 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); | 2262 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
| 2351 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); | 2263 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
| 2352 | 2264 |
| 2353 assembler->BindBlock(&fall_through); | 2265 assembler->BindBlock(&fall_through); |
| 2354 on_success()->Emit(compiler, &new_trace); | 2266 on_success()->Emit(compiler, &new_trace); |
| 2355 } | 2267 } |
| 2356 | 2268 |
| 2357 | |
| 2358 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2269 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2359 RegExpCompiler* compiler, | 2270 RegExpCompiler* compiler, |
| 2360 intptr_t filled_in, | 2271 intptr_t filled_in, |
| 2361 bool not_at_start) { | 2272 bool not_at_start) { |
| 2362 if (assertion_type_ == AT_START && not_at_start) { | 2273 if (assertion_type_ == AT_START && not_at_start) { |
| 2363 details->set_cannot_match(); | 2274 details->set_cannot_match(); |
| 2364 return; | 2275 return; |
| 2365 } | 2276 } |
| 2366 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, | 2277 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, |
| 2367 not_at_start); | 2278 not_at_start); |
| 2368 } | 2279 } |
| 2369 | 2280 |
| 2370 | |
| 2371 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2281 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 2372 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2282 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2373 switch (assertion_type_) { | 2283 switch (assertion_type_) { |
| 2374 case AT_END: { | 2284 case AT_END: { |
| 2375 BlockLabel ok; | 2285 BlockLabel ok; |
| 2376 assembler->CheckPosition(trace->cp_offset(), &ok); | 2286 assembler->CheckPosition(trace->cp_offset(), &ok); |
| 2377 assembler->GoTo(trace->backtrack()); | 2287 assembler->GoTo(trace->backtrack()); |
| 2378 assembler->BindBlock(&ok); | 2288 assembler->BindBlock(&ok); |
| 2379 break; | 2289 break; |
| 2380 } | 2290 } |
| (...skipping 15 matching lines...) Expand all Loading... |
| 2396 return; | 2306 return; |
| 2397 case AT_BOUNDARY: | 2307 case AT_BOUNDARY: |
| 2398 case AT_NON_BOUNDARY: { | 2308 case AT_NON_BOUNDARY: { |
| 2399 EmitBoundaryCheck(compiler, trace); | 2309 EmitBoundaryCheck(compiler, trace); |
| 2400 return; | 2310 return; |
| 2401 } | 2311 } |
| 2402 } | 2312 } |
| 2403 on_success()->Emit(compiler, trace); | 2313 on_success()->Emit(compiler, trace); |
| 2404 } | 2314 } |
| 2405 | 2315 |
| 2406 | |
| 2407 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { | 2316 static bool DeterminedAlready(QuickCheckDetails* quick_check, intptr_t offset) { |
| 2408 if (quick_check == NULL) return false; | 2317 if (quick_check == NULL) return false; |
| 2409 if (offset >= quick_check->characters()) return false; | 2318 if (offset >= quick_check->characters()) return false; |
| 2410 return quick_check->positions(offset)->determines_perfectly; | 2319 return quick_check->positions(offset)->determines_perfectly; |
| 2411 } | 2320 } |
| 2412 | 2321 |
| 2413 | |
| 2414 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { | 2322 static void UpdateBoundsCheck(intptr_t index, intptr_t* checked_up_to) { |
| 2415 if (index > *checked_up_to) { | 2323 if (index > *checked_up_to) { |
| 2416 *checked_up_to = index; | 2324 *checked_up_to = index; |
| 2417 } | 2325 } |
| 2418 } | 2326 } |
| 2419 | 2327 |
| 2420 | |
| 2421 // We call this repeatedly to generate code for each pass over the text node. | 2328 // We call this repeatedly to generate code for each pass over the text node. |
| 2422 // The passes are in increasing order of difficulty because we hope one | 2329 // The passes are in increasing order of difficulty because we hope one |
| 2423 // of the first passes will fail in which case we are saved the work of the | 2330 // of the first passes will fail in which case we are saved the work of the |
| 2424 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ | 2331 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ |
| 2425 // we will check the '%' in the first pass, the case independent 'a' in the | 2332 // we will check the '%' in the first pass, the case independent 'a' in the |
| 2426 // second pass and the character class in the last pass. | 2333 // second pass and the character class in the last pass. |
| 2427 // | 2334 // |
| 2428 // The passes are done from right to left, so for example to test for /bar/ | 2335 // The passes are done from right to left, so for example to test for /bar/ |
| 2429 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 | 2336 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 |
| 2430 // and then a 'b' with offset 0. This means we can avoid the end-of-input | 2337 // and then a 'b' with offset 0. This means we can avoid the end-of-input |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2501 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 2408 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 2502 RegExpCharacterClass* cc = elm.char_class(); | 2409 RegExpCharacterClass* cc = elm.char_class(); |
| 2503 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, | 2410 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
| 2504 *checked_up_to < cp_offset, preloaded, Z); | 2411 *checked_up_to < cp_offset, preloaded, Z); |
| 2505 UpdateBoundsCheck(cp_offset, checked_up_to); | 2412 UpdateBoundsCheck(cp_offset, checked_up_to); |
| 2506 } | 2413 } |
| 2507 } | 2414 } |
| 2508 } | 2415 } |
| 2509 } | 2416 } |
| 2510 | 2417 |
| 2511 | |
| 2512 intptr_t TextNode::Length() { | 2418 intptr_t TextNode::Length() { |
| 2513 TextElement elm = elms_->Last(); | 2419 TextElement elm = elms_->Last(); |
| 2514 ASSERT(elm.cp_offset() >= 0); | 2420 ASSERT(elm.cp_offset() >= 0); |
| 2515 return elm.cp_offset() + elm.length(); | 2421 return elm.cp_offset() + elm.length(); |
| 2516 } | 2422 } |
| 2517 | 2423 |
| 2518 | |
| 2519 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { | 2424 bool TextNode::SkipPass(intptr_t intptr_t_pass, bool ignore_case) { |
| 2520 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); | 2425 TextEmitPassType pass = static_cast<TextEmitPassType>(intptr_t_pass); |
| 2521 if (ignore_case) { | 2426 if (ignore_case) { |
| 2522 return pass == SIMPLE_CHARACTER_MATCH; | 2427 return pass == SIMPLE_CHARACTER_MATCH; |
| 2523 } else { | 2428 } else { |
| 2524 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; | 2429 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; |
| 2525 } | 2430 } |
| 2526 } | 2431 } |
| 2527 | 2432 |
| 2528 | |
| 2529 // This generates the code to match a text node. A text node can contain | 2433 // This generates the code to match a text node. A text node can contain |
| 2530 // straight character sequences (possibly to be matched in a case-independent | 2434 // straight character sequences (possibly to be matched in a case-independent |
| 2531 // way) and character classes. For efficiency we do not do this in a single | 2435 // way) and character classes. For efficiency we do not do this in a single |
| 2532 // pass from left to right. Instead we pass over the text node several times, | 2436 // pass from left to right. Instead we pass over the text node several times, |
| 2533 // emitting code for some character positions every time. See the comment on | 2437 // emitting code for some character positions every time. See the comment on |
| 2534 // TextEmitPass for details. | 2438 // TextEmitPass for details. |
| 2535 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2439 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 2536 LimitResult limit_result = LimitVersions(compiler, trace); | 2440 LimitResult limit_result = LimitVersions(compiler, trace); |
| 2537 if (limit_result == DONE) return; | 2441 if (limit_result == DONE) return; |
| 2538 ASSERT(limit_result == CONTINUE); | 2442 ASSERT(limit_result == CONTINUE); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2570 } | 2474 } |
| 2571 } | 2475 } |
| 2572 | 2476 |
| 2573 Trace successor_trace(*trace); | 2477 Trace successor_trace(*trace); |
| 2574 successor_trace.set_at_start(false); | 2478 successor_trace.set_at_start(false); |
| 2575 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 2479 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
| 2576 RecursionCheck rc(compiler); | 2480 RecursionCheck rc(compiler); |
| 2577 on_success()->Emit(compiler, &successor_trace); | 2481 on_success()->Emit(compiler, &successor_trace); |
| 2578 } | 2482 } |
| 2579 | 2483 |
| 2580 | |
| 2581 void Trace::InvalidateCurrentCharacter() { | 2484 void Trace::InvalidateCurrentCharacter() { |
| 2582 characters_preloaded_ = 0; | 2485 characters_preloaded_ = 0; |
| 2583 } | 2486 } |
| 2584 | 2487 |
| 2585 | |
| 2586 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, | 2488 void Trace::AdvanceCurrentPositionInTrace(intptr_t by, |
| 2587 RegExpCompiler* compiler) { | 2489 RegExpCompiler* compiler) { |
| 2588 ASSERT(by > 0); | 2490 ASSERT(by > 0); |
| 2589 // We don't have an instruction for shifting the current character register | 2491 // We don't have an instruction for shifting the current character register |
| 2590 // down or for using a shifted value for anything so lets just forget that | 2492 // down or for using a shifted value for anything so lets just forget that |
| 2591 // we preloaded any characters into it. | 2493 // we preloaded any characters into it. |
| 2592 characters_preloaded_ = 0; | 2494 characters_preloaded_ = 0; |
| 2593 // Adjust the offsets of the quick check performed information. This | 2495 // Adjust the offsets of the quick check performed information. This |
| 2594 // information is used to find out what we already determined about the | 2496 // information is used to find out what we already determined about the |
| 2595 // characters by means of mask and compare. | 2497 // characters by means of mask and compare. |
| 2596 quick_check_performed_.Advance(by, compiler->one_byte()); | 2498 quick_check_performed_.Advance(by, compiler->one_byte()); |
| 2597 cp_offset_ += by; | 2499 cp_offset_ += by; |
| 2598 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 2500 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
| 2599 compiler->SetRegExpTooBig(); | 2501 compiler->SetRegExpTooBig(); |
| 2600 cp_offset_ = 0; | 2502 cp_offset_ = 0; |
| 2601 } | 2503 } |
| 2602 bound_checked_up_to_ = | 2504 bound_checked_up_to_ = |
| 2603 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); | 2505 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); |
| 2604 } | 2506 } |
| 2605 | 2507 |
| 2606 | |
| 2607 void TextNode::MakeCaseIndependent(bool is_one_byte) { | 2508 void TextNode::MakeCaseIndependent(bool is_one_byte) { |
| 2608 intptr_t element_count = elms_->length(); | 2509 intptr_t element_count = elms_->length(); |
| 2609 for (intptr_t i = 0; i < element_count; i++) { | 2510 for (intptr_t i = 0; i < element_count; i++) { |
| 2610 TextElement elm = elms_->At(i); | 2511 TextElement elm = elms_->At(i); |
| 2611 if (elm.text_type() == TextElement::CHAR_CLASS) { | 2512 if (elm.text_type() == TextElement::CHAR_CLASS) { |
| 2612 RegExpCharacterClass* cc = elm.char_class(); | 2513 RegExpCharacterClass* cc = elm.char_class(); |
| 2613 // None of the standard character classes is different in the case | 2514 // None of the standard character classes is different in the case |
| 2614 // independent case and it slows us down if we don't know that. | 2515 // independent case and it slows us down if we don't know that. |
| 2615 if (cc->is_standard()) continue; | 2516 if (cc->is_standard()) continue; |
| 2616 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 2517 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
| 2617 intptr_t range_count = ranges->length(); | 2518 intptr_t range_count = ranges->length(); |
| 2618 for (intptr_t j = 0; j < range_count; j++) { | 2519 for (intptr_t j = 0; j < range_count; j++) { |
| 2619 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z); | 2520 (*ranges)[j].AddCaseEquivalents(ranges, is_one_byte, Z); |
| 2620 } | 2521 } |
| 2621 } | 2522 } |
| 2622 } | 2523 } |
| 2623 } | 2524 } |
| 2624 | 2525 |
| 2625 | |
| 2626 intptr_t TextNode::GreedyLoopTextLength() { | 2526 intptr_t TextNode::GreedyLoopTextLength() { |
| 2627 TextElement elm = elms_->At(elms_->length() - 1); | 2527 TextElement elm = elms_->At(elms_->length() - 1); |
| 2628 return elm.cp_offset() + elm.length(); | 2528 return elm.cp_offset() + elm.length(); |
| 2629 } | 2529 } |
| 2630 | 2530 |
| 2631 | |
| 2632 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 2531 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
| 2633 RegExpCompiler* compiler) { | 2532 RegExpCompiler* compiler) { |
| 2634 if (elms_->length() != 1) return NULL; | 2533 if (elms_->length() != 1) return NULL; |
| 2635 TextElement elm = elms_->At(0); | 2534 TextElement elm = elms_->At(0); |
| 2636 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; | 2535 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
| 2637 RegExpCharacterClass* node = elm.char_class(); | 2536 RegExpCharacterClass* node = elm.char_class(); |
| 2638 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); | 2537 ZoneGrowableArray<CharacterRange>* ranges = node->ranges(); |
| 2639 if (!CharacterRange::IsCanonical(ranges)) { | 2538 if (!CharacterRange::IsCanonical(ranges)) { |
| 2640 CharacterRange::Canonicalize(ranges); | 2539 CharacterRange::Canonicalize(ranges); |
| 2641 } | 2540 } |
| 2642 if (node->is_negated()) { | 2541 if (node->is_negated()) { |
| 2643 return ranges->length() == 0 ? on_success() : NULL; | 2542 return ranges->length() == 0 ? on_success() : NULL; |
| 2644 } | 2543 } |
| 2645 if (ranges->length() != 1) return NULL; | 2544 if (ranges->length() != 1) return NULL; |
| 2646 uint32_t max_char; | 2545 uint32_t max_char; |
| 2647 if (compiler->one_byte()) { | 2546 if (compiler->one_byte()) { |
| 2648 max_char = Symbols::kMaxOneCharCodeSymbol; | 2547 max_char = Symbols::kMaxOneCharCodeSymbol; |
| 2649 } else { | 2548 } else { |
| 2650 max_char = Utf16::kMaxCodeUnit; | 2549 max_char = Utf16::kMaxCodeUnit; |
| 2651 } | 2550 } |
| 2652 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; | 2551 return ranges->At(0).IsEverything(max_char) ? on_success() : NULL; |
| 2653 } | 2552 } |
| 2654 | 2553 |
| 2655 | |
| 2656 // Finds the fixed match length of a sequence of nodes that goes from | 2554 // Finds the fixed match length of a sequence of nodes that goes from |
| 2657 // this alternative and back to this choice node. If there are variable | 2555 // this alternative and back to this choice node. If there are variable |
| 2658 // length nodes or other complications in the way then return a sentinel | 2556 // length nodes or other complications in the way then return a sentinel |
| 2659 // value indicating that a greedy loop cannot be constructed. | 2557 // value indicating that a greedy loop cannot be constructed. |
| 2660 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( | 2558 intptr_t ChoiceNode::GreedyLoopTextLengthForAlternative( |
| 2661 GuardedAlternative* alternative) { | 2559 GuardedAlternative* alternative) { |
| 2662 intptr_t length = 0; | 2560 intptr_t length = 0; |
| 2663 RegExpNode* node = alternative->node(); | 2561 RegExpNode* node = alternative->node(); |
| 2664 // Later we will generate code for all these text nodes using recursion | 2562 // Later we will generate code for all these text nodes using recursion |
| 2665 // so we have to limit the max number. | 2563 // so we have to limit the max number. |
| 2666 intptr_t recursion_depth = 0; | 2564 intptr_t recursion_depth = 0; |
| 2667 while (node != this) { | 2565 while (node != this) { |
| 2668 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | 2566 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { |
| 2669 return kNodeIsTooComplexForGreedyLoops; | 2567 return kNodeIsTooComplexForGreedyLoops; |
| 2670 } | 2568 } |
| 2671 intptr_t node_length = node->GreedyLoopTextLength(); | 2569 intptr_t node_length = node->GreedyLoopTextLength(); |
| 2672 if (node_length == kNodeIsTooComplexForGreedyLoops) { | 2570 if (node_length == kNodeIsTooComplexForGreedyLoops) { |
| 2673 return kNodeIsTooComplexForGreedyLoops; | 2571 return kNodeIsTooComplexForGreedyLoops; |
| 2674 } | 2572 } |
| 2675 length += node_length; | 2573 length += node_length; |
| 2676 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | 2574 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); |
| 2677 node = seq_node->on_success(); | 2575 node = seq_node->on_success(); |
| 2678 } | 2576 } |
| 2679 return length; | 2577 return length; |
| 2680 } | 2578 } |
| 2681 | 2579 |
| 2682 | |
| 2683 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { | 2580 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { |
| 2684 ASSERT(loop_node_ == NULL); | 2581 ASSERT(loop_node_ == NULL); |
| 2685 AddAlternative(alt); | 2582 AddAlternative(alt); |
| 2686 loop_node_ = alt.node(); | 2583 loop_node_ = alt.node(); |
| 2687 } | 2584 } |
| 2688 | 2585 |
| 2689 | |
| 2690 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { | 2586 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { |
| 2691 ASSERT(continue_node_ == NULL); | 2587 ASSERT(continue_node_ == NULL); |
| 2692 AddAlternative(alt); | 2588 AddAlternative(alt); |
| 2693 continue_node_ = alt.node(); | 2589 continue_node_ = alt.node(); |
| 2694 } | 2590 } |
| 2695 | 2591 |
| 2696 | |
| 2697 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2592 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 2698 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2593 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 2699 if (trace->stop_node() == this) { | 2594 if (trace->stop_node() == this) { |
| 2700 // Back edge of greedy optimized loop node graph. | 2595 // Back edge of greedy optimized loop node graph. |
| 2701 intptr_t text_length = | 2596 intptr_t text_length = |
| 2702 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); | 2597 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); |
| 2703 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); | 2598 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); |
| 2704 // Update the counter-based backtracking info on the stack. This is an | 2599 // Update the counter-based backtracking info on the stack. This is an |
| 2705 // optimization for greedy loops (see below). | 2600 // optimization for greedy loops (see below). |
| 2706 ASSERT(trace->cp_offset() == text_length); | 2601 ASSERT(trace->cp_offset() == text_length); |
| 2707 macro_assembler->AdvanceCurrentPosition(text_length); | 2602 macro_assembler->AdvanceCurrentPosition(text_length); |
| 2708 macro_assembler->GoTo(trace->loop_label()); | 2603 macro_assembler->GoTo(trace->loop_label()); |
| 2709 return; | 2604 return; |
| 2710 } | 2605 } |
| 2711 ASSERT(trace->stop_node() == NULL); | 2606 ASSERT(trace->stop_node() == NULL); |
| 2712 if (!trace->is_trivial()) { | 2607 if (!trace->is_trivial()) { |
| 2713 trace->Flush(compiler, this); | 2608 trace->Flush(compiler, this); |
| 2714 return; | 2609 return; |
| 2715 } | 2610 } |
| 2716 ChoiceNode::Emit(compiler, trace); | 2611 ChoiceNode::Emit(compiler, trace); |
| 2717 } | 2612 } |
| 2718 | 2613 |
| 2719 | |
| 2720 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 2614 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
| 2721 intptr_t eats_at_least) { | 2615 intptr_t eats_at_least) { |
| 2722 intptr_t preload_characters = | 2616 intptr_t preload_characters = |
| 2723 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); | 2617 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); |
| 2724 if (compiler->macro_assembler()->CanReadUnaligned()) { | 2618 if (compiler->macro_assembler()->CanReadUnaligned()) { |
| 2725 bool one_byte = compiler->one_byte(); | 2619 bool one_byte = compiler->one_byte(); |
| 2726 if (one_byte) { | 2620 if (one_byte) { |
| 2727 if (preload_characters > 4) preload_characters = 4; | 2621 if (preload_characters > 4) preload_characters = 4; |
| 2728 // We can't preload 3 characters because there is no machine instruction | 2622 // We can't preload 3 characters because there is no machine instruction |
| 2729 // to do that. We can't just load 4 because we could be reading | 2623 // to do that. We can't just load 4 because we could be reading |
| 2730 // beyond the end of the string, which could cause a memory fault. | 2624 // beyond the end of the string, which could cause a memory fault. |
| 2731 if (preload_characters == 3) preload_characters = 2; | 2625 if (preload_characters == 3) preload_characters = 2; |
| 2732 } else { | 2626 } else { |
| 2733 if (preload_characters > 2) preload_characters = 2; | 2627 if (preload_characters > 2) preload_characters = 2; |
| 2734 } | 2628 } |
| 2735 } else { | 2629 } else { |
| 2736 if (preload_characters > 1) preload_characters = 1; | 2630 if (preload_characters > 1) preload_characters = 1; |
| 2737 } | 2631 } |
| 2738 return preload_characters; | 2632 return preload_characters; |
| 2739 } | 2633 } |
| 2740 | 2634 |
| 2741 | |
| 2742 // This structure is used when generating the alternatives in a choice node. It | 2635 // This structure is used when generating the alternatives in a choice node. It |
| 2743 // records the way the alternative is being code generated. | 2636 // records the way the alternative is being code generated. |
| 2744 struct AlternativeGeneration { | 2637 struct AlternativeGeneration { |
| 2745 AlternativeGeneration() | 2638 AlternativeGeneration() |
| 2746 : possible_success(), | 2639 : possible_success(), |
| 2747 expects_preload(false), | 2640 expects_preload(false), |
| 2748 after(), | 2641 after(), |
| 2749 quick_check_details() {} | 2642 quick_check_details() {} |
| 2750 BlockLabel possible_success; | 2643 BlockLabel possible_success; |
| 2751 bool expects_preload; | 2644 bool expects_preload; |
| 2752 BlockLabel after; | 2645 BlockLabel after; |
| 2753 QuickCheckDetails quick_check_details; | 2646 QuickCheckDetails quick_check_details; |
| 2754 }; | 2647 }; |
| 2755 | 2648 |
| 2756 | |
| 2757 // Creates a list of AlternativeGenerations. If the list has a reasonable | 2649 // Creates a list of AlternativeGenerations. If the list has a reasonable |
| 2758 // size then it is on the stack, otherwise the excess is on the heap. | 2650 // size then it is on the stack, otherwise the excess is on the heap. |
| 2759 class AlternativeGenerationList { | 2651 class AlternativeGenerationList { |
| 2760 public: | 2652 public: |
| 2761 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { | 2653 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { |
| 2762 for (intptr_t i = 0; i < count && i < kAFew; i++) { | 2654 for (intptr_t i = 0; i < count && i < kAFew; i++) { |
| 2763 alt_gens_.Add(a_few_alt_gens_ + i); | 2655 alt_gens_.Add(a_few_alt_gens_ + i); |
| 2764 } | 2656 } |
| 2765 for (intptr_t i = kAFew; i < count; i++) { | 2657 for (intptr_t i = kAFew; i < count; i++) { |
| 2766 alt_gens_.Add(new AlternativeGeneration()); | 2658 alt_gens_.Add(new AlternativeGeneration()); |
| 2767 } | 2659 } |
| 2768 } | 2660 } |
| 2769 ~AlternativeGenerationList() { | 2661 ~AlternativeGenerationList() { |
| 2770 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { | 2662 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { |
| 2771 delete alt_gens_[i]; | 2663 delete alt_gens_[i]; |
| 2772 alt_gens_[i] = NULL; | 2664 alt_gens_[i] = NULL; |
| 2773 } | 2665 } |
| 2774 } | 2666 } |
| 2775 | 2667 |
| 2776 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } | 2668 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } |
| 2777 | 2669 |
| 2778 private: | 2670 private: |
| 2779 static const intptr_t kAFew = 10; | 2671 static const intptr_t kAFew = 10; |
| 2780 GrowableArray<AlternativeGeneration*> alt_gens_; | 2672 GrowableArray<AlternativeGeneration*> alt_gens_; |
| 2781 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2673 AlternativeGeneration a_few_alt_gens_[kAFew]; |
| 2782 | 2674 |
| 2783 DISALLOW_ALLOCATION(); | 2675 DISALLOW_ALLOCATION(); |
| 2784 }; | 2676 }; |
| 2785 | 2677 |
| 2786 | |
| 2787 // The '2' variant is inclusive from and exclusive to. | 2678 // The '2' variant is inclusive from and exclusive to. |
| 2788 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, | 2679 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, |
| 2789 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. | 2680 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. |
| 2790 static const intptr_t kSpaceRanges[] = { | 2681 static const intptr_t kSpaceRanges[] = { |
| 2791 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, | 2682 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, |
| 2792 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, | 2683 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, |
| 2793 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; | 2684 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; |
| 2794 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); | 2685 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); |
| 2795 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', | 2686 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', |
| 2796 '_' + 1, 'a', 'z' + 1, 0x10000}; | 2687 '_' + 1, 'a', 'z' + 1, 0x10000}; |
| 2797 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); | 2688 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); |
| 2798 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; | 2689 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; |
| 2799 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); | 2690 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); |
| 2800 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; | 2691 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; |
| 2801 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); | 2692 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); |
| 2802 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, | 2693 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, |
| 2803 0x2028, 0x202A, 0x10000}; | 2694 0x2028, 0x202A, 0x10000}; |
| 2804 static const intptr_t kLineTerminatorRangeCount = | 2695 static const intptr_t kLineTerminatorRangeCount = |
| 2805 ARRAY_SIZE(kLineTerminatorRanges); | 2696 ARRAY_SIZE(kLineTerminatorRanges); |
| 2806 | 2697 |
| 2807 | |
| 2808 void BoyerMoorePositionInfo::Set(intptr_t character) { | 2698 void BoyerMoorePositionInfo::Set(intptr_t character) { |
| 2809 SetInterval(Interval(character, character)); | 2699 SetInterval(Interval(character, character)); |
| 2810 } | 2700 } |
| 2811 | 2701 |
| 2812 | |
| 2813 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { | 2702 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
| 2814 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); | 2703 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); |
| 2815 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); | 2704 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); |
| 2816 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); | 2705 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); |
| 2817 surrogate_ = | 2706 surrogate_ = |
| 2818 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); | 2707 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); |
| 2819 if (interval.to() - interval.from() >= kMapSize - 1) { | 2708 if (interval.to() - interval.from() >= kMapSize - 1) { |
| 2820 if (map_count_ != kMapSize) { | 2709 if (map_count_ != kMapSize) { |
| 2821 map_count_ = kMapSize; | 2710 map_count_ = kMapSize; |
| 2822 for (intptr_t i = 0; i < kMapSize; i++) | 2711 for (intptr_t i = 0; i < kMapSize; i++) |
| 2823 (*map_)[i] = true; | 2712 (*map_)[i] = true; |
| 2824 } | 2713 } |
| 2825 return; | 2714 return; |
| 2826 } | 2715 } |
| 2827 for (intptr_t i = interval.from(); i <= interval.to(); i++) { | 2716 for (intptr_t i = interval.from(); i <= interval.to(); i++) { |
| 2828 intptr_t mod_character = (i & kMask); | 2717 intptr_t mod_character = (i & kMask); |
| 2829 if (!map_->At(mod_character)) { | 2718 if (!map_->At(mod_character)) { |
| 2830 map_count_++; | 2719 map_count_++; |
| 2831 (*map_)[mod_character] = true; | 2720 (*map_)[mod_character] = true; |
| 2832 } | 2721 } |
| 2833 if (map_count_ == kMapSize) return; | 2722 if (map_count_ == kMapSize) return; |
| 2834 } | 2723 } |
| 2835 } | 2724 } |
| 2836 | 2725 |
| 2837 | |
| 2838 void BoyerMoorePositionInfo::SetAll() { | 2726 void BoyerMoorePositionInfo::SetAll() { |
| 2839 s_ = w_ = d_ = kLatticeUnknown; | 2727 s_ = w_ = d_ = kLatticeUnknown; |
| 2840 if (map_count_ != kMapSize) { | 2728 if (map_count_ != kMapSize) { |
| 2841 map_count_ = kMapSize; | 2729 map_count_ = kMapSize; |
| 2842 for (intptr_t i = 0; i < kMapSize; i++) | 2730 for (intptr_t i = 0; i < kMapSize; i++) |
| 2843 (*map_)[i] = true; | 2731 (*map_)[i] = true; |
| 2844 } | 2732 } |
| 2845 } | 2733 } |
| 2846 | 2734 |
| 2847 | |
| 2848 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, | 2735 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, |
| 2849 RegExpCompiler* compiler, | 2736 RegExpCompiler* compiler, |
| 2850 Zone* zone) | 2737 Zone* zone) |
| 2851 : length_(length), compiler_(compiler) { | 2738 : length_(length), compiler_(compiler) { |
| 2852 if (compiler->one_byte()) { | 2739 if (compiler->one_byte()) { |
| 2853 max_char_ = Symbols::kMaxOneCharCodeSymbol; | 2740 max_char_ = Symbols::kMaxOneCharCodeSymbol; |
| 2854 } else { | 2741 } else { |
| 2855 max_char_ = Utf16::kMaxCodeUnit; | 2742 max_char_ = Utf16::kMaxCodeUnit; |
| 2856 } | 2743 } |
| 2857 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); | 2744 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); |
| 2858 for (intptr_t i = 0; i < length; i++) { | 2745 for (intptr_t i = 0; i < length; i++) { |
| 2859 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); | 2746 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); |
| 2860 } | 2747 } |
| 2861 } | 2748 } |
| 2862 | 2749 |
| 2863 | |
| 2864 // Find the longest range of lookahead that has the fewest number of different | 2750 // Find the longest range of lookahead that has the fewest number of different |
| 2865 // characters that can occur at a given position. Since we are optimizing two | 2751 // characters that can occur at a given position. Since we are optimizing two |
| 2866 // different parameters at once this is a tradeoff. | 2752 // different parameters at once this is a tradeoff. |
| 2867 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { | 2753 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { |
| 2868 intptr_t biggest_points = 0; | 2754 intptr_t biggest_points = 0; |
| 2869 // If more than 32 characters out of 128 can occur it is unlikely that we can | 2755 // If more than 32 characters out of 128 can occur it is unlikely that we can |
| 2870 // be lucky enough to step forwards much of the time. | 2756 // be lucky enough to step forwards much of the time. |
| 2871 const intptr_t kMaxMax = 32; | 2757 const intptr_t kMaxMax = 32; |
| 2872 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; | 2758 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; |
| 2873 max_number_of_chars *= 2) { | 2759 max_number_of_chars *= 2) { |
| 2874 biggest_points = | 2760 biggest_points = |
| 2875 FindBestInterval(max_number_of_chars, biggest_points, from, to); | 2761 FindBestInterval(max_number_of_chars, biggest_points, from, to); |
| 2876 } | 2762 } |
| 2877 if (biggest_points == 0) return false; | 2763 if (biggest_points == 0) return false; |
| 2878 return true; | 2764 return true; |
| 2879 } | 2765 } |
| 2880 | 2766 |
| 2881 | |
| 2882 // Find the highest-points range between 0 and length_ where the character | 2767 // Find the highest-points range between 0 and length_ where the character |
| 2883 // information is not too vague. 'Too vague' means that there are more than | 2768 // information is not too vague. 'Too vague' means that there are more than |
| 2884 // max_number_of_chars that can occur at this position. Calculates the number | 2769 // max_number_of_chars that can occur at this position. Calculates the number |
| 2885 // of points as the product of width-of-the-range and | 2770 // of points as the product of width-of-the-range and |
| 2886 // probability-of-finding-one-of-the-characters, where the probability is | 2771 // probability-of-finding-one-of-the-characters, where the probability is |
| 2887 // calculated using the frequency distribution of the sample subject string. | 2772 // calculated using the frequency distribution of the sample subject string. |
| 2888 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, | 2773 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, |
| 2889 intptr_t old_biggest_points, | 2774 intptr_t old_biggest_points, |
| 2890 intptr_t* from, | 2775 intptr_t* from, |
| 2891 intptr_t* to) { | 2776 intptr_t* to) { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2931 intptr_t points = (i - remembered_from) * probability; | 2816 intptr_t points = (i - remembered_from) * probability; |
| 2932 if (points > biggest_points) { | 2817 if (points > biggest_points) { |
| 2933 *from = remembered_from; | 2818 *from = remembered_from; |
| 2934 *to = i - 1; | 2819 *to = i - 1; |
| 2935 biggest_points = points; | 2820 biggest_points = points; |
| 2936 } | 2821 } |
| 2937 } | 2822 } |
| 2938 return biggest_points; | 2823 return biggest_points; |
| 2939 } | 2824 } |
| 2940 | 2825 |
| 2941 | |
| 2942 // Take all the characters that will not prevent a successful match if they | 2826 // Take all the characters that will not prevent a successful match if they |
| 2943 // occur in the subject string in the range between min_lookahead and | 2827 // occur in the subject string in the range between min_lookahead and |
| 2944 // max_lookahead (inclusive) measured from the current position. If the | 2828 // max_lookahead (inclusive) measured from the current position. If the |
| 2945 // character at max_lookahead offset is not one of these characters, then we | 2829 // character at max_lookahead offset is not one of these characters, then we |
| 2946 // can safely skip forwards by the number of characters in the range. | 2830 // can safely skip forwards by the number of characters in the range. |
| 2947 intptr_t BoyerMooreLookahead::GetSkipTable( | 2831 intptr_t BoyerMooreLookahead::GetSkipTable( |
| 2948 intptr_t min_lookahead, | 2832 intptr_t min_lookahead, |
| 2949 intptr_t max_lookahead, | 2833 intptr_t max_lookahead, |
| 2950 const TypedData& boolean_skip_table) { | 2834 const TypedData& boolean_skip_table) { |
| 2951 const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 2835 const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 2963 for (intptr_t j = 0; j < kSize; j++) { | 2847 for (intptr_t j = 0; j < kSize; j++) { |
| 2964 if (map->at(j)) { | 2848 if (map->at(j)) { |
| 2965 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); | 2849 boolean_skip_table.SetUint8(j, kDontSkipArrayEntry); |
| 2966 } | 2850 } |
| 2967 } | 2851 } |
| 2968 } | 2852 } |
| 2969 | 2853 |
| 2970 return skip; | 2854 return skip; |
| 2971 } | 2855 } |
| 2972 | 2856 |
| 2973 | |
| 2974 // See comment above on the implementation of GetSkipTable. | 2857 // See comment above on the implementation of GetSkipTable. |
| 2975 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { | 2858 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { |
| 2976 const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 2859 const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
| 2977 | 2860 |
| 2978 intptr_t min_lookahead = 0; | 2861 intptr_t min_lookahead = 0; |
| 2979 intptr_t max_lookahead = 0; | 2862 intptr_t max_lookahead = 0; |
| 2980 | 2863 |
| 2981 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; | 2864 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; |
| 2982 | 2865 |
| 2983 bool found_single_character = false; | 2866 bool found_single_character = false; |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3034 masm->CheckPreemption(/*is_backtrack=*/false); | 2917 masm->CheckPreemption(/*is_backtrack=*/false); |
| 3035 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 2918 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
| 3036 masm->CheckBitInTable(boolean_skip_table, &cont); | 2919 masm->CheckBitInTable(boolean_skip_table, &cont); |
| 3037 masm->AdvanceCurrentPosition(skip_distance); | 2920 masm->AdvanceCurrentPosition(skip_distance); |
| 3038 masm->GoTo(&again); | 2921 masm->GoTo(&again); |
| 3039 masm->BindBlock(&cont); | 2922 masm->BindBlock(&cont); |
| 3040 | 2923 |
| 3041 return; | 2924 return; |
| 3042 } | 2925 } |
| 3043 | 2926 |
| 3044 | |
| 3045 /* Code generation for choice nodes. | 2927 /* Code generation for choice nodes. |
| 3046 * | 2928 * |
| 3047 * We generate quick checks that do a mask and compare to eliminate a | 2929 * We generate quick checks that do a mask and compare to eliminate a |
| 3048 * choice. If the quick check succeeds then it jumps to the continuation to | 2930 * choice. If the quick check succeeds then it jumps to the continuation to |
| 3049 * do slow checks and check subsequent nodes. If it fails (the common case) | 2931 * do slow checks and check subsequent nodes. If it fails (the common case) |
| 3050 * it falls through to the next choice. | 2932 * it falls through to the next choice. |
| 3051 * | 2933 * |
| 3052 * Here is the desired flow graph. Nodes directly below each other imply | 2934 * Here is the desired flow graph. Nodes directly below each other imply |
| 3053 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative | 2935 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative |
| 3054 * 3 doesn't have a quick check so we have to call the slow check. | 2936 * 3 doesn't have a quick check so we have to call the slow check. |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3114 * S| / | 2996 * S| / |
| 3115 * V F / | 2997 * V F / |
| 3116 * S2--/ | 2998 * S2--/ |
| 3117 */ | 2999 */ |
| 3118 | 3000 |
| 3119 GreedyLoopState::GreedyLoopState(bool not_at_start) { | 3001 GreedyLoopState::GreedyLoopState(bool not_at_start) { |
| 3120 counter_backtrack_trace_.set_backtrack(&label_); | 3002 counter_backtrack_trace_.set_backtrack(&label_); |
| 3121 if (not_at_start) counter_backtrack_trace_.set_at_start(false); | 3003 if (not_at_start) counter_backtrack_trace_.set_at_start(false); |
| 3122 } | 3004 } |
| 3123 | 3005 |
| 3124 | |
| 3125 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { | 3006 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { |
| 3126 #ifdef DEBUG | 3007 #ifdef DEBUG |
| 3127 intptr_t choice_count = alternatives_->length(); | 3008 intptr_t choice_count = alternatives_->length(); |
| 3128 for (intptr_t i = 0; i < choice_count - 1; i++) { | 3009 for (intptr_t i = 0; i < choice_count - 1; i++) { |
| 3129 GuardedAlternative alternative = alternatives_->At(i); | 3010 GuardedAlternative alternative = alternatives_->At(i); |
| 3130 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3011 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
| 3131 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3012 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
| 3132 for (intptr_t j = 0; j < guard_count; j++) { | 3013 for (intptr_t j = 0; j < guard_count; j++) { |
| 3133 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); | 3014 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); |
| 3134 } | 3015 } |
| 3135 } | 3016 } |
| 3136 #endif | 3017 #endif |
| 3137 } | 3018 } |
| 3138 | 3019 |
| 3139 | |
| 3140 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, | 3020 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
| 3141 Trace* current_trace, | 3021 Trace* current_trace, |
| 3142 PreloadState* state) { | 3022 PreloadState* state) { |
| 3143 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { | 3023 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { |
| 3144 // Save some time by looking at most one machine word ahead. | 3024 // Save some time by looking at most one machine word ahead. |
| 3145 state->eats_at_least_ = | 3025 state->eats_at_least_ = |
| 3146 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, | 3026 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, |
| 3147 current_trace->at_start() == Trace::FALSE_VALUE); | 3027 current_trace->at_start() == Trace::FALSE_VALUE); |
| 3148 } | 3028 } |
| 3149 state->preload_characters_ = | 3029 state->preload_characters_ = |
| 3150 CalculatePreloadCharacters(compiler, state->eats_at_least_); | 3030 CalculatePreloadCharacters(compiler, state->eats_at_least_); |
| 3151 | 3031 |
| 3152 state->preload_is_current_ = | 3032 state->preload_is_current_ = |
| 3153 (current_trace->characters_preloaded() == state->preload_characters_); | 3033 (current_trace->characters_preloaded() == state->preload_characters_); |
| 3154 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3034 state->preload_has_checked_bounds_ = state->preload_is_current_; |
| 3155 } | 3035 } |
| 3156 | 3036 |
| 3157 | |
| 3158 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3037 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3159 intptr_t choice_count = alternatives_->length(); | 3038 intptr_t choice_count = alternatives_->length(); |
| 3160 | 3039 |
| 3161 AssertGuardsMentionRegisters(trace); | 3040 AssertGuardsMentionRegisters(trace); |
| 3162 | 3041 |
| 3163 LimitResult limit_result = LimitVersions(compiler, trace); | 3042 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3164 if (limit_result == DONE) return; | 3043 if (limit_result == DONE) return; |
| 3165 ASSERT(limit_result == CONTINUE); | 3044 ASSERT(limit_result == CONTINUE); |
| 3166 | 3045 |
| 3167 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for | 3046 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3253 | 3132 |
| 3254 macro_assembler->BindBlock(greedy_loop_state->label()); | 3133 macro_assembler->BindBlock(greedy_loop_state->label()); |
| 3255 // If we have unwound to the bottom then backtrack. | 3134 // If we have unwound to the bottom then backtrack. |
| 3256 macro_assembler->CheckGreedyLoop(trace->backtrack()); | 3135 macro_assembler->CheckGreedyLoop(trace->backtrack()); |
| 3257 // Otherwise try the second priority at an earlier position. | 3136 // Otherwise try the second priority at an earlier position. |
| 3258 macro_assembler->AdvanceCurrentPosition(-text_length); | 3137 macro_assembler->AdvanceCurrentPosition(-text_length); |
| 3259 macro_assembler->GoTo(&second_choice); | 3138 macro_assembler->GoTo(&second_choice); |
| 3260 return new_trace; | 3139 return new_trace; |
| 3261 } | 3140 } |
| 3262 | 3141 |
| 3263 | |
| 3264 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, | 3142 intptr_t ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
| 3265 Trace* trace) { | 3143 Trace* trace) { |
| 3266 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; | 3144 intptr_t eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; |
| 3267 if (alternatives_->length() != 2) return eats_at_least; | 3145 if (alternatives_->length() != 2) return eats_at_least; |
| 3268 | 3146 |
| 3269 GuardedAlternative alt1 = alternatives_->At(1); | 3147 GuardedAlternative alt1 = alternatives_->At(1); |
| 3270 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { | 3148 if (alt1.guards() != NULL && alt1.guards()->length() != 0) { |
| 3271 return eats_at_least; | 3149 return eats_at_least; |
| 3272 } | 3150 } |
| 3273 RegExpNode* eats_anything_node = alt1.node(); | 3151 RegExpNode* eats_anything_node = alt1.node(); |
| (...skipping 27 matching lines...) Expand all Loading... |
| 3301 GuardedAlternative alt0 = alternatives_->At(0); | 3179 GuardedAlternative alt0 = alternatives_->At(0); |
| 3302 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); | 3180 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); |
| 3303 } | 3181 } |
| 3304 } | 3182 } |
| 3305 if (bm != NULL) { | 3183 if (bm != NULL) { |
| 3306 bm->EmitSkipInstructions(macro_assembler); | 3184 bm->EmitSkipInstructions(macro_assembler); |
| 3307 } | 3185 } |
| 3308 return eats_at_least; | 3186 return eats_at_least; |
| 3309 } | 3187 } |
| 3310 | 3188 |
| 3311 | |
| 3312 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, | 3189 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, |
| 3313 AlternativeGenerationList* alt_gens, | 3190 AlternativeGenerationList* alt_gens, |
| 3314 intptr_t first_choice, | 3191 intptr_t first_choice, |
| 3315 Trace* trace, | 3192 Trace* trace, |
| 3316 PreloadState* preload) { | 3193 PreloadState* preload) { |
| 3317 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3194 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 3318 SetUpPreLoad(compiler, trace, preload); | 3195 SetUpPreLoad(compiler, trace, preload); |
| 3319 | 3196 |
| 3320 // For now we just call all choices one after the other. The idea ultimately | 3197 // For now we just call all choices one after the other. The idea ultimately |
| 3321 // is to use the Dispatch table to try only the relevant ones. | 3198 // is to use the Dispatch table to try only the relevant ones. |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3386 for (intptr_t j = 0; j < guard_count; j++) { | 3263 for (intptr_t j = 0; j < guard_count; j++) { |
| 3387 GenerateGuard(macro_assembler, guards->At(j), &new_trace); | 3264 GenerateGuard(macro_assembler, guards->At(j), &new_trace); |
| 3388 } | 3265 } |
| 3389 alternative.node()->Emit(compiler, &new_trace); | 3266 alternative.node()->Emit(compiler, &new_trace); |
| 3390 preload->preload_is_current_ = false; | 3267 preload->preload_is_current_ = false; |
| 3391 } | 3268 } |
| 3392 macro_assembler->BindBlock(&alt_gen->after); | 3269 macro_assembler->BindBlock(&alt_gen->after); |
| 3393 } | 3270 } |
| 3394 } | 3271 } |
| 3395 | 3272 |
| 3396 | |
| 3397 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, | 3273 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, |
| 3398 Trace* trace, | 3274 Trace* trace, |
| 3399 GuardedAlternative alternative, | 3275 GuardedAlternative alternative, |
| 3400 AlternativeGeneration* alt_gen, | 3276 AlternativeGeneration* alt_gen, |
| 3401 intptr_t preload_characters, | 3277 intptr_t preload_characters, |
| 3402 bool next_expects_preload) { | 3278 bool next_expects_preload) { |
| 3403 if (!alt_gen->possible_success.IsLinked()) return; | 3279 if (!alt_gen->possible_success.IsLinked()) return; |
| 3404 | 3280 |
| 3405 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3281 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 3406 macro_assembler->BindBlock(&alt_gen->possible_success); | 3282 macro_assembler->BindBlock(&alt_gen->possible_success); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 3426 macro_assembler->GoTo(&(alt_gen->after)); | 3302 macro_assembler->GoTo(&(alt_gen->after)); |
| 3427 } else { | 3303 } else { |
| 3428 out_of_line_trace.set_backtrack(&(alt_gen->after)); | 3304 out_of_line_trace.set_backtrack(&(alt_gen->after)); |
| 3429 for (intptr_t j = 0; j < guard_count; j++) { | 3305 for (intptr_t j = 0; j < guard_count; j++) { |
| 3430 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3306 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
| 3431 } | 3307 } |
| 3432 alternative.node()->Emit(compiler, &out_of_line_trace); | 3308 alternative.node()->Emit(compiler, &out_of_line_trace); |
| 3433 } | 3309 } |
| 3434 } | 3310 } |
| 3435 | 3311 |
| 3436 | |
| 3437 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3312 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3438 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3313 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3439 LimitResult limit_result = LimitVersions(compiler, trace); | 3314 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3440 if (limit_result == DONE) return; | 3315 if (limit_result == DONE) return; |
| 3441 ASSERT(limit_result == CONTINUE); | 3316 ASSERT(limit_result == CONTINUE); |
| 3442 | 3317 |
| 3443 RecursionCheck rc(compiler); | 3318 RecursionCheck rc(compiler); |
| 3444 | 3319 |
| 3445 switch (action_type_) { | 3320 switch (action_type_) { |
| 3446 case STORE_POSITION: { | 3321 case STORE_POSITION: { |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3547 | 3422 |
| 3548 ASSERT(trace->backtrack() == NULL); | 3423 ASSERT(trace->backtrack() == NULL); |
| 3549 assembler->Backtrack(); | 3424 assembler->Backtrack(); |
| 3550 return; | 3425 return; |
| 3551 } | 3426 } |
| 3552 default: | 3427 default: |
| 3553 UNREACHABLE(); | 3428 UNREACHABLE(); |
| 3554 } | 3429 } |
| 3555 } | 3430 } |
| 3556 | 3431 |
| 3557 | |
| 3558 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3432 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3559 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3433 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3560 if (!trace->is_trivial()) { | 3434 if (!trace->is_trivial()) { |
| 3561 trace->Flush(compiler, this); | 3435 trace->Flush(compiler, this); |
| 3562 return; | 3436 return; |
| 3563 } | 3437 } |
| 3564 | 3438 |
| 3565 LimitResult limit_result = LimitVersions(compiler, trace); | 3439 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3566 if (limit_result == DONE) return; | 3440 if (limit_result == DONE) return; |
| 3567 ASSERT(limit_result == CONTINUE); | 3441 ASSERT(limit_result == CONTINUE); |
| 3568 | 3442 |
| 3569 RecursionCheck rc(compiler); | 3443 RecursionCheck rc(compiler); |
| 3570 | 3444 |
| 3571 ASSERT(start_reg_ + 1 == end_reg_); | 3445 ASSERT(start_reg_ + 1 == end_reg_); |
| 3572 if (compiler->ignore_case()) { | 3446 if (compiler->ignore_case()) { |
| 3573 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); | 3447 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); |
| 3574 } else { | 3448 } else { |
| 3575 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 3449 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
| 3576 } | 3450 } |
| 3577 on_success()->Emit(compiler, trace); | 3451 on_success()->Emit(compiler, trace); |
| 3578 } | 3452 } |
| 3579 | 3453 |
| 3580 | |
| 3581 // ------------------------------------------------------------------- | 3454 // ------------------------------------------------------------------- |
| 3582 // Dot/dotty output | 3455 // Dot/dotty output |
| 3583 | 3456 |
| 3584 | |
| 3585 #ifdef DEBUG | 3457 #ifdef DEBUG |
| 3586 | 3458 |
| 3587 | |
| 3588 class DotPrinter : public NodeVisitor { | 3459 class DotPrinter : public NodeVisitor { |
| 3589 public: | 3460 public: |
| 3590 explicit DotPrinter(bool ignore_case) {} | 3461 explicit DotPrinter(bool ignore_case) {} |
| 3591 void PrintNode(const char* label, RegExpNode* node); | 3462 void PrintNode(const char* label, RegExpNode* node); |
| 3592 void Visit(RegExpNode* node); | 3463 void Visit(RegExpNode* node); |
| 3593 void PrintAttributes(RegExpNode* from); | 3464 void PrintAttributes(RegExpNode* from); |
| 3594 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | 3465 void PrintOnFailure(RegExpNode* from, RegExpNode* to); |
| 3595 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); | 3466 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); |
| 3596 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 3467 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 3597 #undef DECLARE_VISIT | 3468 #undef DECLARE_VISIT |
| 3598 }; | 3469 }; |
| 3599 | 3470 |
| 3600 | |
| 3601 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { | 3471 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { |
| 3602 OS::Print("digraph G {\n graph [label=\""); | 3472 OS::Print("digraph G {\n graph [label=\""); |
| 3603 for (intptr_t i = 0; label[i]; i++) { | 3473 for (intptr_t i = 0; label[i]; i++) { |
| 3604 switch (label[i]) { | 3474 switch (label[i]) { |
| 3605 case '\\': | 3475 case '\\': |
| 3606 OS::Print("\\\\"); | 3476 OS::Print("\\\\"); |
| 3607 break; | 3477 break; |
| 3608 case '"': | 3478 case '"': |
| 3609 OS::Print("\""); | 3479 OS::Print("\""); |
| 3610 break; | 3480 break; |
| 3611 default: | 3481 default: |
| 3612 OS::Print("%c", label[i]); | 3482 OS::Print("%c", label[i]); |
| 3613 break; | 3483 break; |
| 3614 } | 3484 } |
| 3615 } | 3485 } |
| 3616 OS::Print("\"];\n"); | 3486 OS::Print("\"];\n"); |
| 3617 Visit(node); | 3487 Visit(node); |
| 3618 OS::Print("}\n"); | 3488 OS::Print("}\n"); |
| 3619 } | 3489 } |
| 3620 | 3490 |
| 3621 | |
| 3622 void DotPrinter::Visit(RegExpNode* node) { | 3491 void DotPrinter::Visit(RegExpNode* node) { |
| 3623 if (node->info()->visited) return; | 3492 if (node->info()->visited) return; |
| 3624 node->info()->visited = true; | 3493 node->info()->visited = true; |
| 3625 node->Accept(this); | 3494 node->Accept(this); |
| 3626 } | 3495 } |
| 3627 | 3496 |
| 3628 | |
| 3629 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 3497 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
| 3630 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); | 3498 OS::Print(" n%p -> n%p [style=dotted];\n", from, on_failure); |
| 3631 Visit(on_failure); | 3499 Visit(on_failure); |
| 3632 } | 3500 } |
| 3633 | 3501 |
| 3634 | |
| 3635 class AttributePrinter : public ValueObject { | 3502 class AttributePrinter : public ValueObject { |
| 3636 public: | 3503 public: |
| 3637 AttributePrinter() : first_(true) {} | 3504 AttributePrinter() : first_(true) {} |
| 3638 void PrintSeparator() { | 3505 void PrintSeparator() { |
| 3639 if (first_) { | 3506 if (first_) { |
| 3640 first_ = false; | 3507 first_ = false; |
| 3641 } else { | 3508 } else { |
| 3642 OS::Print("|"); | 3509 OS::Print("|"); |
| 3643 } | 3510 } |
| 3644 } | 3511 } |
| 3645 void PrintBit(const char* name, bool value) { | 3512 void PrintBit(const char* name, bool value) { |
| 3646 if (!value) return; | 3513 if (!value) return; |
| 3647 PrintSeparator(); | 3514 PrintSeparator(); |
| 3648 OS::Print("{%s}", name); | 3515 OS::Print("{%s}", name); |
| 3649 } | 3516 } |
| 3650 void PrintPositive(const char* name, intptr_t value) { | 3517 void PrintPositive(const char* name, intptr_t value) { |
| 3651 if (value < 0) return; | 3518 if (value < 0) return; |
| 3652 PrintSeparator(); | 3519 PrintSeparator(); |
| 3653 OS::Print("{%s|%" Pd "}", name, value); | 3520 OS::Print("{%s|%" Pd "}", name, value); |
| 3654 } | 3521 } |
| 3655 | 3522 |
| 3656 private: | 3523 private: |
| 3657 bool first_; | 3524 bool first_; |
| 3658 }; | 3525 }; |
| 3659 | 3526 |
| 3660 | |
| 3661 void DotPrinter::PrintAttributes(RegExpNode* that) { | 3527 void DotPrinter::PrintAttributes(RegExpNode* that) { |
| 3662 OS::Print( | 3528 OS::Print( |
| 3663 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " | 3529 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " |
| 3664 "margin=0.1, fontsize=10, label=\"{", | 3530 "margin=0.1, fontsize=10, label=\"{", |
| 3665 that); | 3531 that); |
| 3666 AttributePrinter printer; | 3532 AttributePrinter printer; |
| 3667 NodeInfo* info = that->info(); | 3533 NodeInfo* info = that->info(); |
| 3668 printer.PrintBit("NI", info->follows_newline_interest); | 3534 printer.PrintBit("NI", info->follows_newline_interest); |
| 3669 printer.PrintBit("WI", info->follows_word_interest); | 3535 printer.PrintBit("WI", info->follows_word_interest); |
| 3670 printer.PrintBit("SI", info->follows_start_interest); | 3536 printer.PrintBit("SI", info->follows_start_interest); |
| 3671 BlockLabel* label = that->label(); | 3537 BlockLabel* label = that->label(); |
| 3672 if (label->IsBound()) printer.PrintPositive("@", label->Position()); | 3538 if (label->IsBound()) printer.PrintPositive("@", label->Position()); |
| 3673 OS::Print( | 3539 OS::Print( |
| 3674 "}\"];\n" | 3540 "}\"];\n" |
| 3675 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", | 3541 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", |
| 3676 that, that); | 3542 that, that); |
| 3677 } | 3543 } |
| 3678 | 3544 |
| 3679 | |
| 3680 void DotPrinter::VisitChoice(ChoiceNode* that) { | 3545 void DotPrinter::VisitChoice(ChoiceNode* that) { |
| 3681 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 3546 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
| 3682 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3547 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 3683 GuardedAlternative alt = that->alternatives()->At(i); | 3548 GuardedAlternative alt = that->alternatives()->At(i); |
| 3684 OS::Print(" n%p -> n%p", that, alt.node()); | 3549 OS::Print(" n%p -> n%p", that, alt.node()); |
| 3685 } | 3550 } |
| 3686 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3551 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 3687 GuardedAlternative alt = that->alternatives()->At(i); | 3552 GuardedAlternative alt = that->alternatives()->At(i); |
| 3688 alt.node()->Accept(this); | 3553 alt.node()->Accept(this); |
| 3689 } | 3554 } |
| 3690 } | 3555 } |
| 3691 | 3556 |
| 3692 | |
| 3693 void DotPrinter::VisitText(TextNode* that) { | 3557 void DotPrinter::VisitText(TextNode* that) { |
| 3694 OS::Print(" n%p [label=\"", that); | 3558 OS::Print(" n%p [label=\"", that); |
| 3695 for (intptr_t i = 0; i < that->elements()->length(); i++) { | 3559 for (intptr_t i = 0; i < that->elements()->length(); i++) { |
| 3696 if (i > 0) OS::Print(" "); | 3560 if (i > 0) OS::Print(" "); |
| 3697 TextElement elm = that->elements()->At(i); | 3561 TextElement elm = that->elements()->At(i); |
| 3698 switch (elm.text_type()) { | 3562 switch (elm.text_type()) { |
| 3699 case TextElement::ATOM: { | 3563 case TextElement::ATOM: { |
| 3700 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); | 3564 ZoneGrowableArray<uint16_t>* data = elm.atom()->data(); |
| 3701 for (intptr_t i = 0; i < data->length(); i++) { | 3565 for (intptr_t i = 0; i < data->length(); i++) { |
| 3702 OS::Print("%c", static_cast<char>(data->At(i))); | 3566 OS::Print("%c", static_cast<char>(data->At(i))); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 3719 default: | 3583 default: |
| 3720 UNREACHABLE(); | 3584 UNREACHABLE(); |
| 3721 } | 3585 } |
| 3722 } | 3586 } |
| 3723 OS::Print("\", shape=box, peripheries=2];\n"); | 3587 OS::Print("\", shape=box, peripheries=2];\n"); |
| 3724 PrintAttributes(that); | 3588 PrintAttributes(that); |
| 3725 OS::Print(" n%p -> n%p;\n", that, that->on_success()); | 3589 OS::Print(" n%p -> n%p;\n", that, that->on_success()); |
| 3726 Visit(that->on_success()); | 3590 Visit(that->on_success()); |
| 3727 } | 3591 } |
| 3728 | 3592 |
| 3729 | |
| 3730 void DotPrinter::VisitBackReference(BackReferenceNode* that) { | 3593 void DotPrinter::VisitBackReference(BackReferenceNode* that) { |
| 3731 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", | 3594 OS::Print(" n%p [label=\"$%" Pd "..$%" Pd "\", shape=doubleoctagon];\n", |
| 3732 that, that->start_register(), that->end_register()); | 3595 that, that->start_register(), that->end_register()); |
| 3733 PrintAttributes(that); | 3596 PrintAttributes(that); |
| 3734 OS::Print(" n%p -> n%p;\n", that, that->on_success()); | 3597 OS::Print(" n%p -> n%p;\n", that, that->on_success()); |
| 3735 Visit(that->on_success()); | 3598 Visit(that->on_success()); |
| 3736 } | 3599 } |
| 3737 | 3600 |
| 3738 | |
| 3739 void DotPrinter::VisitEnd(EndNode* that) { | 3601 void DotPrinter::VisitEnd(EndNode* that) { |
| 3740 OS::Print(" n%p [style=bold, shape=point];\n", that); | 3602 OS::Print(" n%p [style=bold, shape=point];\n", that); |
| 3741 PrintAttributes(that); | 3603 PrintAttributes(that); |
| 3742 } | 3604 } |
| 3743 | 3605 |
| 3744 | |
| 3745 void DotPrinter::VisitAssertion(AssertionNode* that) { | 3606 void DotPrinter::VisitAssertion(AssertionNode* that) { |
| 3746 OS::Print(" n%p [", that); | 3607 OS::Print(" n%p [", that); |
| 3747 switch (that->assertion_type()) { | 3608 switch (that->assertion_type()) { |
| 3748 case AssertionNode::AT_END: | 3609 case AssertionNode::AT_END: |
| 3749 OS::Print("label=\"$\", shape=septagon"); | 3610 OS::Print("label=\"$\", shape=septagon"); |
| 3750 break; | 3611 break; |
| 3751 case AssertionNode::AT_START: | 3612 case AssertionNode::AT_START: |
| 3752 OS::Print("label=\"^\", shape=septagon"); | 3613 OS::Print("label=\"^\", shape=septagon"); |
| 3753 break; | 3614 break; |
| 3754 case AssertionNode::AT_BOUNDARY: | 3615 case AssertionNode::AT_BOUNDARY: |
| 3755 OS::Print("label=\"\\b\", shape=septagon"); | 3616 OS::Print("label=\"\\b\", shape=septagon"); |
| 3756 break; | 3617 break; |
| 3757 case AssertionNode::AT_NON_BOUNDARY: | 3618 case AssertionNode::AT_NON_BOUNDARY: |
| 3758 OS::Print("label=\"\\B\", shape=septagon"); | 3619 OS::Print("label=\"\\B\", shape=septagon"); |
| 3759 break; | 3620 break; |
| 3760 case AssertionNode::AFTER_NEWLINE: | 3621 case AssertionNode::AFTER_NEWLINE: |
| 3761 OS::Print("label=\"(?<=\\n)\", shape=septagon"); | 3622 OS::Print("label=\"(?<=\\n)\", shape=septagon"); |
| 3762 break; | 3623 break; |
| 3763 } | 3624 } |
| 3764 OS::Print("];\n"); | 3625 OS::Print("];\n"); |
| 3765 PrintAttributes(that); | 3626 PrintAttributes(that); |
| 3766 RegExpNode* successor = that->on_success(); | 3627 RegExpNode* successor = that->on_success(); |
| 3767 OS::Print(" n%p -> n%p;\n", that, successor); | 3628 OS::Print(" n%p -> n%p;\n", that, successor); |
| 3768 Visit(successor); | 3629 Visit(successor); |
| 3769 } | 3630 } |
| 3770 | 3631 |
| 3771 | |
| 3772 void DotPrinter::VisitAction(ActionNode* that) { | 3632 void DotPrinter::VisitAction(ActionNode* that) { |
| 3773 OS::Print(" n%p [", that); | 3633 OS::Print(" n%p [", that); |
| 3774 switch (that->action_type_) { | 3634 switch (that->action_type_) { |
| 3775 case ActionNode::SET_REGISTER: | 3635 case ActionNode::SET_REGISTER: |
| 3776 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", | 3636 OS::Print("label=\"$%" Pd ":=%" Pd "\", shape=octagon", |
| 3777 that->data_.u_store_register.reg, | 3637 that->data_.u_store_register.reg, |
| 3778 that->data_.u_store_register.value); | 3638 that->data_.u_store_register.value); |
| 3779 break; | 3639 break; |
| 3780 case ActionNode::INCREMENT_REGISTER: | 3640 case ActionNode::INCREMENT_REGISTER: |
| 3781 OS::Print("label=\"$%" Pd "++\", shape=octagon", | 3641 OS::Print("label=\"$%" Pd "++\", shape=octagon", |
| (...skipping 23 matching lines...) Expand all Loading... |
| 3805 break; | 3665 break; |
| 3806 } | 3666 } |
| 3807 } | 3667 } |
| 3808 OS::Print("];\n"); | 3668 OS::Print("];\n"); |
| 3809 PrintAttributes(that); | 3669 PrintAttributes(that); |
| 3810 RegExpNode* successor = that->on_success(); | 3670 RegExpNode* successor = that->on_success(); |
| 3811 OS::Print(" n%p -> n%p;\n", that, successor); | 3671 OS::Print(" n%p -> n%p;\n", that, successor); |
| 3812 Visit(successor); | 3672 Visit(successor); |
| 3813 } | 3673 } |
| 3814 | 3674 |
| 3815 | |
| 3816 void RegExpEngine::DotPrint(const char* label, | 3675 void RegExpEngine::DotPrint(const char* label, |
| 3817 RegExpNode* node, | 3676 RegExpNode* node, |
| 3818 bool ignore_case) { | 3677 bool ignore_case) { |
| 3819 DotPrinter printer(ignore_case); | 3678 DotPrinter printer(ignore_case); |
| 3820 printer.PrintNode(label, node); | 3679 printer.PrintNode(label, node); |
| 3821 } | 3680 } |
| 3822 | 3681 |
| 3823 | |
| 3824 #endif // DEBUG | 3682 #endif // DEBUG |
| 3825 | 3683 |
| 3826 | |
| 3827 // ------------------------------------------------------------------- | 3684 // ------------------------------------------------------------------- |
| 3828 // Tree to graph conversion | 3685 // Tree to graph conversion |
| 3829 | 3686 |
| 3830 // The zone in which we allocate graph nodes. | 3687 // The zone in which we allocate graph nodes. |
| 3831 #define OZ (on_success->zone()) | 3688 #define OZ (on_success->zone()) |
| 3832 | 3689 |
| 3833 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 3690 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
| 3834 RegExpNode* on_success) { | 3691 RegExpNode* on_success) { |
| 3835 ZoneGrowableArray<TextElement>* elms = | 3692 ZoneGrowableArray<TextElement>* elms = |
| 3836 new (OZ) ZoneGrowableArray<TextElement>(1); | 3693 new (OZ) ZoneGrowableArray<TextElement>(1); |
| 3837 elms->Add(TextElement::Atom(this)); | 3694 elms->Add(TextElement::Atom(this)); |
| 3838 return new (OZ) TextNode(elms, on_success); | 3695 return new (OZ) TextNode(elms, on_success); |
| 3839 } | 3696 } |
| 3840 | 3697 |
| 3841 | |
| 3842 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 3698 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| 3843 RegExpNode* on_success) { | 3699 RegExpNode* on_success) { |
| 3844 ZoneGrowableArray<TextElement>* elms = | 3700 ZoneGrowableArray<TextElement>* elms = |
| 3845 new (OZ) ZoneGrowableArray<TextElement>(1); | 3701 new (OZ) ZoneGrowableArray<TextElement>(1); |
| 3846 for (intptr_t i = 0; i < elements()->length(); i++) { | 3702 for (intptr_t i = 0; i < elements()->length(); i++) { |
| 3847 elms->Add(elements()->At(i)); | 3703 elms->Add(elements()->At(i)); |
| 3848 } | 3704 } |
| 3849 return new (OZ) TextNode(elms, on_success); | 3705 return new (OZ) TextNode(elms, on_success); |
| 3850 } | 3706 } |
| 3851 | 3707 |
| 3852 | |
| 3853 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, | 3708 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, |
| 3854 const intptr_t* special_class, | 3709 const intptr_t* special_class, |
| 3855 intptr_t length) { | 3710 intptr_t length) { |
| 3856 length--; // Remove final 0x10000. | 3711 length--; // Remove final 0x10000. |
| 3857 ASSERT(special_class[length] == 0x10000); | 3712 ASSERT(special_class[length] == 0x10000); |
| 3858 ASSERT(ranges->length() != 0); | 3713 ASSERT(ranges->length() != 0); |
| 3859 ASSERT(length != 0); | 3714 ASSERT(length != 0); |
| 3860 ASSERT(special_class[0] != 0); | 3715 ASSERT(special_class[0] != 0); |
| 3861 if (ranges->length() != (length >> 1) + 1) { | 3716 if (ranges->length() != (length >> 1) + 1) { |
| 3862 return false; | 3717 return false; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 3873 if (special_class[i + 1] != range.from()) { | 3728 if (special_class[i + 1] != range.from()) { |
| 3874 return false; | 3729 return false; |
| 3875 } | 3730 } |
| 3876 } | 3731 } |
| 3877 if (range.to() != 0xffff) { | 3732 if (range.to() != 0xffff) { |
| 3878 return false; | 3733 return false; |
| 3879 } | 3734 } |
| 3880 return true; | 3735 return true; |
| 3881 } | 3736 } |
| 3882 | 3737 |
| 3883 | |
| 3884 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, | 3738 static bool CompareRanges(ZoneGrowableArray<CharacterRange>* ranges, |
| 3885 const intptr_t* special_class, | 3739 const intptr_t* special_class, |
| 3886 intptr_t length) { | 3740 intptr_t length) { |
| 3887 length--; // Remove final 0x10000. | 3741 length--; // Remove final 0x10000. |
| 3888 ASSERT(special_class[length] == 0x10000); | 3742 ASSERT(special_class[length] == 0x10000); |
| 3889 if (ranges->length() * 2 != length) { | 3743 if (ranges->length() * 2 != length) { |
| 3890 return false; | 3744 return false; |
| 3891 } | 3745 } |
| 3892 for (intptr_t i = 0; i < length; i += 2) { | 3746 for (intptr_t i = 0; i < length; i += 2) { |
| 3893 CharacterRange range = ranges->At(i >> 1); | 3747 CharacterRange range = ranges->At(i >> 1); |
| 3894 if (range.from() != special_class[i] || | 3748 if (range.from() != special_class[i] || |
| 3895 range.to() != special_class[i + 1] - 1) { | 3749 range.to() != special_class[i + 1] - 1) { |
| 3896 return false; | 3750 return false; |
| 3897 } | 3751 } |
| 3898 } | 3752 } |
| 3899 return true; | 3753 return true; |
| 3900 } | 3754 } |
| 3901 | 3755 |
| 3902 | |
| 3903 bool RegExpCharacterClass::is_standard() { | 3756 bool RegExpCharacterClass::is_standard() { |
| 3904 // TODO(lrn): Remove need for this function, by not throwing away information | 3757 // TODO(lrn): Remove need for this function, by not throwing away information |
| 3905 // along the way. | 3758 // along the way. |
| 3906 if (is_negated_) { | 3759 if (is_negated_) { |
| 3907 return false; | 3760 return false; |
| 3908 } | 3761 } |
| 3909 if (set_.is_standard()) { | 3762 if (set_.is_standard()) { |
| 3910 return true; | 3763 return true; |
| 3911 } | 3764 } |
| 3912 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3765 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 3931 set_.set_standard_set_type('w'); | 3784 set_.set_standard_set_type('w'); |
| 3932 return true; | 3785 return true; |
| 3933 } | 3786 } |
| 3934 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 3787 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
| 3935 set_.set_standard_set_type('W'); | 3788 set_.set_standard_set_type('W'); |
| 3936 return true; | 3789 return true; |
| 3937 } | 3790 } |
| 3938 return false; | 3791 return false; |
| 3939 } | 3792 } |
| 3940 | 3793 |
| 3941 | |
| 3942 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 3794 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 3943 RegExpNode* on_success) { | 3795 RegExpNode* on_success) { |
| 3944 return new (OZ) TextNode(this, on_success); | 3796 return new (OZ) TextNode(this, on_success); |
| 3945 } | 3797 } |
| 3946 | 3798 |
| 3947 | |
| 3948 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 3799 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 3949 RegExpNode* on_success) { | 3800 RegExpNode* on_success) { |
| 3950 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 3801 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
| 3951 intptr_t length = alternatives->length(); | 3802 intptr_t length = alternatives->length(); |
| 3952 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); | 3803 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); |
| 3953 for (intptr_t i = 0; i < length; i++) { | 3804 for (intptr_t i = 0; i < length; i++) { |
| 3954 GuardedAlternative alternative( | 3805 GuardedAlternative alternative( |
| 3955 alternatives->At(i)->ToNode(compiler, on_success)); | 3806 alternatives->At(i)->ToNode(compiler, on_success)); |
| 3956 result->AddAlternative(alternative); | 3807 result->AddAlternative(alternative); |
| 3957 } | 3808 } |
| 3958 return result; | 3809 return result; |
| 3959 } | 3810 } |
| 3960 | 3811 |
| 3961 | |
| 3962 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 3812 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
| 3963 RegExpNode* on_success) { | 3813 RegExpNode* on_success) { |
| 3964 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); | 3814 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); |
| 3965 } | 3815 } |
| 3966 | 3816 |
| 3967 | |
| 3968 // Scoped object to keep track of how much we unroll quantifier loops in the | 3817 // Scoped object to keep track of how much we unroll quantifier loops in the |
| 3969 // regexp graph generator. | 3818 // regexp graph generator. |
| 3970 class RegExpExpansionLimiter : public ValueObject { | 3819 class RegExpExpansionLimiter : public ValueObject { |
| 3971 public: | 3820 public: |
| 3972 static const intptr_t kMaxExpansionFactor = 6; | 3821 static const intptr_t kMaxExpansionFactor = 6; |
| 3973 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) | 3822 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) |
| 3974 : compiler_(compiler), | 3823 : compiler_(compiler), |
| 3975 saved_expansion_factor_(compiler->current_expansion_factor()), | 3824 saved_expansion_factor_(compiler->current_expansion_factor()), |
| 3976 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { | 3825 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { |
| 3977 ASSERT(factor > 0); | 3826 ASSERT(factor > 0); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 3995 bool ok_to_expand() { return ok_to_expand_; } | 3844 bool ok_to_expand() { return ok_to_expand_; } |
| 3996 | 3845 |
| 3997 private: | 3846 private: |
| 3998 RegExpCompiler* compiler_; | 3847 RegExpCompiler* compiler_; |
| 3999 intptr_t saved_expansion_factor_; | 3848 intptr_t saved_expansion_factor_; |
| 4000 bool ok_to_expand_; | 3849 bool ok_to_expand_; |
| 4001 | 3850 |
| 4002 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); | 3851 DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); |
| 4003 }; | 3852 }; |
| 4004 | 3853 |
| 4005 | |
| 4006 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, | 3854 RegExpNode* RegExpQuantifier::ToNode(intptr_t min, |
| 4007 intptr_t max, | 3855 intptr_t max, |
| 4008 bool is_greedy, | 3856 bool is_greedy, |
| 4009 RegExpTree* body, | 3857 RegExpTree* body, |
| 4010 RegExpCompiler* compiler, | 3858 RegExpCompiler* compiler, |
| 4011 RegExpNode* on_success, | 3859 RegExpNode* on_success, |
| 4012 bool not_at_start) { | 3860 bool not_at_start) { |
| 4013 // x{f, t} becomes this: | 3861 // x{f, t} becomes this: |
| 4014 // | 3862 // |
| 4015 // (r++)<-. | 3863 // (r++)<-. |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4132 center->AddContinueAlternative(rest_alt); | 3980 center->AddContinueAlternative(rest_alt); |
| 4133 center->AddLoopAlternative(body_alt); | 3981 center->AddLoopAlternative(body_alt); |
| 4134 } | 3982 } |
| 4135 if (needs_counter) { | 3983 if (needs_counter) { |
| 4136 return ActionNode::SetRegister(reg_ctr, 0, center); | 3984 return ActionNode::SetRegister(reg_ctr, 0, center); |
| 4137 } else { | 3985 } else { |
| 4138 return center; | 3986 return center; |
| 4139 } | 3987 } |
| 4140 } | 3988 } |
| 4141 | 3989 |
| 4142 | |
| 4143 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 3990 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
| 4144 RegExpNode* on_success) { | 3991 RegExpNode* on_success) { |
| 4145 switch (assertion_type()) { | 3992 switch (assertion_type()) { |
| 4146 case START_OF_LINE: | 3993 case START_OF_LINE: |
| 4147 return AssertionNode::AfterNewline(on_success); | 3994 return AssertionNode::AfterNewline(on_success); |
| 4148 case START_OF_INPUT: | 3995 case START_OF_INPUT: |
| 4149 return AssertionNode::AtStart(on_success); | 3996 return AssertionNode::AtStart(on_success); |
| 4150 case BOUNDARY: | 3997 case BOUNDARY: |
| 4151 return AssertionNode::AtBoundary(on_success); | 3998 return AssertionNode::AtBoundary(on_success); |
| 4152 case NON_BOUNDARY: | 3999 case NON_BOUNDARY: |
| (...skipping 28 matching lines...) Expand all Loading... |
| 4181 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 4028 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
| 4182 result->AddAlternative(end_alternative); | 4029 result->AddAlternative(end_alternative); |
| 4183 return result; | 4030 return result; |
| 4184 } | 4031 } |
| 4185 default: | 4032 default: |
| 4186 UNREACHABLE(); | 4033 UNREACHABLE(); |
| 4187 } | 4034 } |
| 4188 return on_success; | 4035 return on_success; |
| 4189 } | 4036 } |
| 4190 | 4037 |
| 4191 | |
| 4192 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 4038 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
| 4193 RegExpNode* on_success) { | 4039 RegExpNode* on_success) { |
| 4194 return new (OZ) | 4040 return new (OZ) |
| 4195 BackReferenceNode(RegExpCapture::StartRegister(index()), | 4041 BackReferenceNode(RegExpCapture::StartRegister(index()), |
| 4196 RegExpCapture::EndRegister(index()), on_success); | 4042 RegExpCapture::EndRegister(index()), on_success); |
| 4197 } | 4043 } |
| 4198 | 4044 |
| 4199 | |
| 4200 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 4045 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
| 4201 RegExpNode* on_success) { | 4046 RegExpNode* on_success) { |
| 4202 return on_success; | 4047 return on_success; |
| 4203 } | 4048 } |
| 4204 | 4049 |
| 4205 | |
| 4206 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 4050 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
| 4207 RegExpNode* on_success) { | 4051 RegExpNode* on_success) { |
| 4208 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4052 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
| 4209 intptr_t position_register = compiler->AllocateRegister(); | 4053 intptr_t position_register = compiler->AllocateRegister(); |
| 4210 | 4054 |
| 4211 const intptr_t registers_per_capture = 2; | 4055 const intptr_t registers_per_capture = 2; |
| 4212 const intptr_t register_of_first_capture = 2; | 4056 const intptr_t register_of_first_capture = 2; |
| 4213 intptr_t register_count = capture_count_ * registers_per_capture; | 4057 intptr_t register_count = capture_count_ * registers_per_capture; |
| 4214 intptr_t register_start = | 4058 intptr_t register_start = |
| 4215 register_of_first_capture + capture_from_ * registers_per_capture; | 4059 register_of_first_capture + capture_from_ * registers_per_capture; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 4239 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( | 4083 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( |
| 4240 stack_pointer_register, position_register, | 4084 stack_pointer_register, position_register, |
| 4241 register_count, register_start, OZ))); | 4085 register_count, register_start, OZ))); |
| 4242 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( | 4086 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( |
| 4243 body_alt, GuardedAlternative(on_success), OZ); | 4087 body_alt, GuardedAlternative(on_success), OZ); |
| 4244 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, | 4088 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, |
| 4245 choice_node); | 4089 choice_node); |
| 4246 } | 4090 } |
| 4247 } | 4091 } |
| 4248 | 4092 |
| 4249 | |
| 4250 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4093 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
| 4251 RegExpNode* on_success) { | 4094 RegExpNode* on_success) { |
| 4252 return ToNode(body(), index(), compiler, on_success); | 4095 return ToNode(body(), index(), compiler, on_success); |
| 4253 } | 4096 } |
| 4254 | 4097 |
| 4255 | |
| 4256 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 4098 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
| 4257 intptr_t index, | 4099 intptr_t index, |
| 4258 RegExpCompiler* compiler, | 4100 RegExpCompiler* compiler, |
| 4259 RegExpNode* on_success) { | 4101 RegExpNode* on_success) { |
| 4260 intptr_t start_reg = RegExpCapture::StartRegister(index); | 4102 intptr_t start_reg = RegExpCapture::StartRegister(index); |
| 4261 intptr_t end_reg = RegExpCapture::EndRegister(index); | 4103 intptr_t end_reg = RegExpCapture::EndRegister(index); |
| 4262 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); | 4104 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
| 4263 RegExpNode* body_node = body->ToNode(compiler, store_end); | 4105 RegExpNode* body_node = body->ToNode(compiler, store_end); |
| 4264 return ActionNode::StorePosition(start_reg, true, body_node); | 4106 return ActionNode::StorePosition(start_reg, true, body_node); |
| 4265 } | 4107 } |
| 4266 | 4108 |
| 4267 | |
| 4268 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 4109 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
| 4269 RegExpNode* on_success) { | 4110 RegExpNode* on_success) { |
| 4270 ZoneGrowableArray<RegExpTree*>* children = nodes(); | 4111 ZoneGrowableArray<RegExpTree*>* children = nodes(); |
| 4271 RegExpNode* current = on_success; | 4112 RegExpNode* current = on_success; |
| 4272 for (intptr_t i = children->length() - 1; i >= 0; i--) { | 4113 for (intptr_t i = children->length() - 1; i >= 0; i--) { |
| 4273 current = children->At(i)->ToNode(compiler, current); | 4114 current = children->At(i)->ToNode(compiler, current); |
| 4274 } | 4115 } |
| 4275 return current; | 4116 return current; |
| 4276 } | 4117 } |
| 4277 | 4118 |
| 4278 | |
| 4279 static void AddClass(const intptr_t* elmv, | 4119 static void AddClass(const intptr_t* elmv, |
| 4280 intptr_t elmc, | 4120 intptr_t elmc, |
| 4281 ZoneGrowableArray<CharacterRange>* ranges) { | 4121 ZoneGrowableArray<CharacterRange>* ranges) { |
| 4282 elmc--; | 4122 elmc--; |
| 4283 ASSERT(elmv[elmc] == 0x10000); | 4123 ASSERT(elmv[elmc] == 0x10000); |
| 4284 for (intptr_t i = 0; i < elmc; i += 2) { | 4124 for (intptr_t i = 0; i < elmc; i += 2) { |
| 4285 ASSERT(elmv[i] < elmv[i + 1]); | 4125 ASSERT(elmv[i] < elmv[i + 1]); |
| 4286 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); | 4126 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); |
| 4287 } | 4127 } |
| 4288 } | 4128 } |
| 4289 | 4129 |
| 4290 | |
| 4291 static void AddClassNegated(const intptr_t* elmv, | 4130 static void AddClassNegated(const intptr_t* elmv, |
| 4292 intptr_t elmc, | 4131 intptr_t elmc, |
| 4293 ZoneGrowableArray<CharacterRange>* ranges) { | 4132 ZoneGrowableArray<CharacterRange>* ranges) { |
| 4294 elmc--; | 4133 elmc--; |
| 4295 ASSERT(elmv[elmc] == 0x10000); | 4134 ASSERT(elmv[elmc] == 0x10000); |
| 4296 ASSERT(elmv[0] != 0x0000); | 4135 ASSERT(elmv[0] != 0x0000); |
| 4297 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); | 4136 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); |
| 4298 uint16_t last = 0x0000; | 4137 uint16_t last = 0x0000; |
| 4299 for (intptr_t i = 0; i < elmc; i += 2) { | 4138 for (intptr_t i = 0; i < elmc; i += 2) { |
| 4300 ASSERT(last <= elmv[i] - 1); | 4139 ASSERT(last <= elmv[i] - 1); |
| 4301 ASSERT(elmv[i] < elmv[i + 1]); | 4140 ASSERT(elmv[i] < elmv[i + 1]); |
| 4302 ranges->Add(CharacterRange(last, elmv[i] - 1)); | 4141 ranges->Add(CharacterRange(last, elmv[i] - 1)); |
| 4303 last = elmv[i + 1]; | 4142 last = elmv[i + 1]; |
| 4304 } | 4143 } |
| 4305 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); | 4144 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); |
| 4306 } | 4145 } |
| 4307 | 4146 |
| 4308 | |
| 4309 void CharacterRange::AddClassEscape(uint16_t type, | 4147 void CharacterRange::AddClassEscape(uint16_t type, |
| 4310 ZoneGrowableArray<CharacterRange>* ranges) { | 4148 ZoneGrowableArray<CharacterRange>* ranges) { |
| 4311 switch (type) { | 4149 switch (type) { |
| 4312 case 's': | 4150 case 's': |
| 4313 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); | 4151 AddClass(kSpaceRanges, kSpaceRangeCount, ranges); |
| 4314 break; | 4152 break; |
| 4315 case 'S': | 4153 case 'S': |
| 4316 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); | 4154 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); |
| 4317 break; | 4155 break; |
| 4318 case 'w': | 4156 case 'w': |
| (...skipping 20 matching lines...) Expand all Loading... |
| 4339 // This is the set of characters matched by the $ and ^ symbols | 4177 // This is the set of characters matched by the $ and ^ symbols |
| 4340 // in multiline mode. | 4178 // in multiline mode. |
| 4341 case 'n': | 4179 case 'n': |
| 4342 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); | 4180 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); |
| 4343 break; | 4181 break; |
| 4344 default: | 4182 default: |
| 4345 UNREACHABLE(); | 4183 UNREACHABLE(); |
| 4346 } | 4184 } |
| 4347 } | 4185 } |
| 4348 | 4186 |
| 4349 | |
| 4350 void CharacterRange::AddCaseEquivalents( | 4187 void CharacterRange::AddCaseEquivalents( |
| 4351 ZoneGrowableArray<CharacterRange>* ranges, | 4188 ZoneGrowableArray<CharacterRange>* ranges, |
| 4352 bool is_one_byte, | 4189 bool is_one_byte, |
| 4353 Zone* zone) { | 4190 Zone* zone) { |
| 4354 uint16_t bottom = from(); | 4191 uint16_t bottom = from(); |
| 4355 uint16_t top = to(); | 4192 uint16_t top = to(); |
| 4356 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { | 4193 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { |
| 4357 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; | 4194 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; |
| 4358 if (top > Symbols::kMaxOneCharCodeSymbol) { | 4195 if (top > Symbols::kMaxOneCharCodeSymbol) { |
| 4359 top = Symbols::kMaxOneCharCodeSymbol; | 4196 top = Symbols::kMaxOneCharCodeSymbol; |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4411 uint16_t range_to = c - (block_end - end); | 4248 uint16_t range_to = c - (block_end - end); |
| 4412 if (!(bottom <= range_from && range_to <= top)) { | 4249 if (!(bottom <= range_from && range_to <= top)) { |
| 4413 ranges->Add(CharacterRange(range_from, range_to)); | 4250 ranges->Add(CharacterRange(range_from, range_to)); |
| 4414 } | 4251 } |
| 4415 } | 4252 } |
| 4416 pos = end + 1; | 4253 pos = end + 1; |
| 4417 } | 4254 } |
| 4418 } | 4255 } |
| 4419 } | 4256 } |
| 4420 | 4257 |
| 4421 | |
| 4422 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { | 4258 bool CharacterRange::IsCanonical(ZoneGrowableArray<CharacterRange>* ranges) { |
| 4423 ASSERT(ranges != NULL); | 4259 ASSERT(ranges != NULL); |
| 4424 intptr_t n = ranges->length(); | 4260 intptr_t n = ranges->length(); |
| 4425 if (n <= 1) return true; | 4261 if (n <= 1) return true; |
| 4426 intptr_t max = ranges->At(0).to(); | 4262 intptr_t max = ranges->At(0).to(); |
| 4427 for (intptr_t i = 1; i < n; i++) { | 4263 for (intptr_t i = 1; i < n; i++) { |
| 4428 CharacterRange next_range = ranges->At(i); | 4264 CharacterRange next_range = ranges->At(i); |
| 4429 if (next_range.from() <= max + 1) return false; | 4265 if (next_range.from() <= max + 1) return false; |
| 4430 max = next_range.to(); | 4266 max = next_range.to(); |
| 4431 } | 4267 } |
| 4432 return true; | 4268 return true; |
| 4433 } | 4269 } |
| 4434 | 4270 |
| 4435 | |
| 4436 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { | 4271 ZoneGrowableArray<CharacterRange>* CharacterSet::ranges() { |
| 4437 if (ranges_ == NULL) { | 4272 if (ranges_ == NULL) { |
| 4438 ranges_ = new ZoneGrowableArray<CharacterRange>(2); | 4273 ranges_ = new ZoneGrowableArray<CharacterRange>(2); |
| 4439 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4274 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
| 4440 } | 4275 } |
| 4441 return ranges_; | 4276 return ranges_; |
| 4442 } | 4277 } |
| 4443 | 4278 |
| 4444 | |
| 4445 // Move a number of elements in a zone array to another position | 4279 // Move a number of elements in a zone array to another position |
| 4446 // in the same array. Handles overlapping source and target areas. | 4280 // in the same array. Handles overlapping source and target areas. |
| 4447 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, | 4281 static void MoveRanges(ZoneGrowableArray<CharacterRange>* list, |
| 4448 intptr_t from, | 4282 intptr_t from, |
| 4449 intptr_t to, | 4283 intptr_t to, |
| 4450 intptr_t count) { | 4284 intptr_t count) { |
| 4451 // Ranges are potentially overlapping. | 4285 // Ranges are potentially overlapping. |
| 4452 if (from < to) { | 4286 if (from < to) { |
| 4453 for (intptr_t i = count - 1; i >= 0; i--) { | 4287 for (intptr_t i = count - 1; i >= 0; i--) { |
| 4454 (*list)[to + i] = list->At(from + i); | 4288 (*list)[to + i] = list->At(from + i); |
| 4455 } | 4289 } |
| 4456 } else { | 4290 } else { |
| 4457 for (intptr_t i = 0; i < count; i++) { | 4291 for (intptr_t i = 0; i < count; i++) { |
| 4458 (*list)[to + i] = list->At(from + i); | 4292 (*list)[to + i] = list->At(from + i); |
| 4459 } | 4293 } |
| 4460 } | 4294 } |
| 4461 } | 4295 } |
| 4462 | 4296 |
| 4463 | |
| 4464 static intptr_t InsertRangeInCanonicalList( | 4297 static intptr_t InsertRangeInCanonicalList( |
| 4465 ZoneGrowableArray<CharacterRange>* list, | 4298 ZoneGrowableArray<CharacterRange>* list, |
| 4466 intptr_t count, | 4299 intptr_t count, |
| 4467 CharacterRange insert) { | 4300 CharacterRange insert) { |
| 4468 // Inserts a range into list[0..count[, which must be sorted | 4301 // Inserts a range into list[0..count[, which must be sorted |
| 4469 // by from value and non-overlapping and non-adjacent, using at most | 4302 // by from value and non-overlapping and non-adjacent, using at most |
| 4470 // list[0..count] for the result. Returns the number of resulting | 4303 // list[0..count] for the result. Returns the number of resulting |
| 4471 // canonicalized ranges. Inserting a range may collapse existing ranges into | 4304 // canonicalized ranges. Inserting a range may collapse existing ranges into |
| 4472 // fewer ranges, so the return value can be anything in the range 1..count+1. | 4305 // fewer ranges, so the return value can be anything in the range 1..count+1. |
| 4473 uint16_t from = insert.from(); | 4306 uint16_t from = insert.from(); |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4512 | 4345 |
| 4513 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); | 4346 intptr_t new_from = Utils::Minimum(list->At(start_pos).from(), from); |
| 4514 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); | 4347 intptr_t new_to = Utils::Maximum(list->At(end_pos - 1).to(), to); |
| 4515 if (end_pos < count) { | 4348 if (end_pos < count) { |
| 4516 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); | 4349 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); |
| 4517 } | 4350 } |
| 4518 (*list)[start_pos] = CharacterRange(new_from, new_to); | 4351 (*list)[start_pos] = CharacterRange(new_from, new_to); |
| 4519 return count - (end_pos - start_pos) + 1; | 4352 return count - (end_pos - start_pos) + 1; |
| 4520 } | 4353 } |
| 4521 | 4354 |
| 4522 | |
| 4523 void CharacterSet::Canonicalize() { | 4355 void CharacterSet::Canonicalize() { |
| 4524 // Special/default classes are always considered canonical. The result | 4356 // Special/default classes are always considered canonical. The result |
| 4525 // of calling ranges() will be sorted. | 4357 // of calling ranges() will be sorted. |
| 4526 if (ranges_ == NULL) return; | 4358 if (ranges_ == NULL) return; |
| 4527 CharacterRange::Canonicalize(ranges_); | 4359 CharacterRange::Canonicalize(ranges_); |
| 4528 } | 4360 } |
| 4529 | 4361 |
| 4530 | |
| 4531 void CharacterRange::Canonicalize( | 4362 void CharacterRange::Canonicalize( |
| 4532 ZoneGrowableArray<CharacterRange>* character_ranges) { | 4363 ZoneGrowableArray<CharacterRange>* character_ranges) { |
| 4533 if (character_ranges->length() <= 1) return; | 4364 if (character_ranges->length() <= 1) return; |
| 4534 // Check whether ranges are already canonical (increasing, non-overlapping, | 4365 // Check whether ranges are already canonical (increasing, non-overlapping, |
| 4535 // non-adjacent). | 4366 // non-adjacent). |
| 4536 intptr_t n = character_ranges->length(); | 4367 intptr_t n = character_ranges->length(); |
| 4537 intptr_t max = character_ranges->At(0).to(); | 4368 intptr_t max = character_ranges->At(0).to(); |
| 4538 intptr_t i = 1; | 4369 intptr_t i = 1; |
| 4539 while (i < n) { | 4370 while (i < n) { |
| 4540 CharacterRange current = character_ranges->At(i); | 4371 CharacterRange current = character_ranges->At(i); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 4557 do { | 4388 do { |
| 4558 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, | 4389 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, |
| 4559 character_ranges->At(read)); | 4390 character_ranges->At(read)); |
| 4560 read++; | 4391 read++; |
| 4561 } while (read < n); | 4392 } while (read < n); |
| 4562 character_ranges->TruncateTo(num_canonical); | 4393 character_ranges->TruncateTo(num_canonical); |
| 4563 | 4394 |
| 4564 ASSERT(CharacterRange::IsCanonical(character_ranges)); | 4395 ASSERT(CharacterRange::IsCanonical(character_ranges)); |
| 4565 } | 4396 } |
| 4566 | 4397 |
| 4567 | |
| 4568 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, | 4398 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, |
| 4569 ZoneGrowableArray<CharacterRange>* negated_ranges) { | 4399 ZoneGrowableArray<CharacterRange>* negated_ranges) { |
| 4570 ASSERT(CharacterRange::IsCanonical(ranges)); | 4400 ASSERT(CharacterRange::IsCanonical(ranges)); |
| 4571 ASSERT(negated_ranges->length() == 0); | 4401 ASSERT(negated_ranges->length() == 0); |
| 4572 intptr_t range_count = ranges->length(); | 4402 intptr_t range_count = ranges->length(); |
| 4573 uint16_t from = 0; | 4403 uint16_t from = 0; |
| 4574 intptr_t i = 0; | 4404 intptr_t i = 0; |
| 4575 if (range_count > 0 && ranges->At(0).from() == 0) { | 4405 if (range_count > 0 && ranges->At(0).from() == 0) { |
| 4576 from = ranges->At(0).to(); | 4406 from = ranges->At(0).to(); |
| 4577 i = 1; | 4407 i = 1; |
| 4578 } | 4408 } |
| 4579 while (i < range_count) { | 4409 while (i < range_count) { |
| 4580 CharacterRange range = ranges->At(i); | 4410 CharacterRange range = ranges->At(i); |
| 4581 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); | 4411 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); |
| 4582 from = range.to(); | 4412 from = range.to(); |
| 4583 i++; | 4413 i++; |
| 4584 } | 4414 } |
| 4585 if (from < Utf16::kMaxCodeUnit) { | 4415 if (from < Utf16::kMaxCodeUnit) { |
| 4586 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); | 4416 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); |
| 4587 } | 4417 } |
| 4588 } | 4418 } |
| 4589 | 4419 |
| 4590 | |
| 4591 // ------------------------------------------------------------------- | 4420 // ------------------------------------------------------------------- |
| 4592 // Splay tree | 4421 // Splay tree |
| 4593 | 4422 |
| 4594 | |
| 4595 // Workaround for the fact that ZoneGrowableArray does not have contains(). | 4423 // Workaround for the fact that ZoneGrowableArray does not have contains(). |
| 4596 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { | 4424 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { |
| 4597 for (intptr_t i = 0; i < array->length(); i++) { | 4425 for (intptr_t i = 0; i < array->length(); i++) { |
| 4598 if (array->At(i) == value) { | 4426 if (array->At(i) == value) { |
| 4599 return true; | 4427 return true; |
| 4600 } | 4428 } |
| 4601 } | 4429 } |
| 4602 return false; | 4430 return false; |
| 4603 } | 4431 } |
| 4604 | 4432 |
| 4605 | |
| 4606 void OutSet::Set(unsigned value, Zone* zone) { | 4433 void OutSet::Set(unsigned value, Zone* zone) { |
| 4607 if (value < kFirstLimit) { | 4434 if (value < kFirstLimit) { |
| 4608 first_ |= (1 << value); | 4435 first_ |= (1 << value); |
| 4609 } else { | 4436 } else { |
| 4610 if (remaining_ == NULL) | 4437 if (remaining_ == NULL) |
| 4611 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); | 4438 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); |
| 4612 | 4439 |
| 4613 bool remaining_contains_value = ArrayContains(remaining_, value); | 4440 bool remaining_contains_value = ArrayContains(remaining_, value); |
| 4614 if (remaining_->is_empty() || !remaining_contains_value) { | 4441 if (remaining_->is_empty() || !remaining_contains_value) { |
| 4615 remaining_->Add(value); | 4442 remaining_->Add(value); |
| 4616 } | 4443 } |
| 4617 } | 4444 } |
| 4618 } | 4445 } |
| 4619 | 4446 |
| 4620 | |
| 4621 bool OutSet::Get(unsigned value) const { | 4447 bool OutSet::Get(unsigned value) const { |
| 4622 if (value < kFirstLimit) { | 4448 if (value < kFirstLimit) { |
| 4623 return (first_ & (1 << value)) != 0; | 4449 return (first_ & (1 << value)) != 0; |
| 4624 } else if (remaining_ == NULL) { | 4450 } else if (remaining_ == NULL) { |
| 4625 return false; | 4451 return false; |
| 4626 } else { | 4452 } else { |
| 4627 return ArrayContains(remaining_, value); | 4453 return ArrayContains(remaining_, value); |
| 4628 } | 4454 } |
| 4629 } | 4455 } |
| 4630 | 4456 |
| 4631 | |
| 4632 // ------------------------------------------------------------------- | 4457 // ------------------------------------------------------------------- |
| 4633 // Analysis | 4458 // Analysis |
| 4634 | 4459 |
| 4635 | |
| 4636 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4460 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| 4637 if (that->info()->been_analyzed || that->info()->being_analyzed) return; | 4461 if (that->info()->been_analyzed || that->info()->being_analyzed) return; |
| 4638 that->info()->being_analyzed = true; | 4462 that->info()->being_analyzed = true; |
| 4639 that->Accept(this); | 4463 that->Accept(this); |
| 4640 that->info()->being_analyzed = false; | 4464 that->info()->being_analyzed = false; |
| 4641 that->info()->been_analyzed = true; | 4465 that->info()->been_analyzed = true; |
| 4642 } | 4466 } |
| 4643 | 4467 |
| 4644 | |
| 4645 void Analysis::VisitEnd(EndNode* that) { | 4468 void Analysis::VisitEnd(EndNode* that) { |
| 4646 // nothing to do | 4469 // nothing to do |
| 4647 } | 4470 } |
| 4648 | 4471 |
| 4649 | |
| 4650 void TextNode::CalculateOffsets() { | 4472 void TextNode::CalculateOffsets() { |
| 4651 intptr_t element_count = elements()->length(); | 4473 intptr_t element_count = elements()->length(); |
| 4652 // Set up the offsets of the elements relative to the start. This is a fixed | 4474 // Set up the offsets of the elements relative to the start. This is a fixed |
| 4653 // quantity since a TextNode can only contain fixed-width things. | 4475 // quantity since a TextNode can only contain fixed-width things. |
| 4654 intptr_t cp_offset = 0; | 4476 intptr_t cp_offset = 0; |
| 4655 for (intptr_t i = 0; i < element_count; i++) { | 4477 for (intptr_t i = 0; i < element_count; i++) { |
| 4656 TextElement& elm = (*elements())[i]; | 4478 TextElement& elm = (*elements())[i]; |
| 4657 elm.set_cp_offset(cp_offset); | 4479 elm.set_cp_offset(cp_offset); |
| 4658 cp_offset += elm.length(); | 4480 cp_offset += elm.length(); |
| 4659 } | 4481 } |
| 4660 } | 4482 } |
| 4661 | 4483 |
| 4662 | |
| 4663 void Analysis::VisitText(TextNode* that) { | 4484 void Analysis::VisitText(TextNode* that) { |
| 4664 if (ignore_case_) { | 4485 if (ignore_case_) { |
| 4665 that->MakeCaseIndependent(is_one_byte_); | 4486 that->MakeCaseIndependent(is_one_byte_); |
| 4666 } | 4487 } |
| 4667 EnsureAnalyzed(that->on_success()); | 4488 EnsureAnalyzed(that->on_success()); |
| 4668 if (!has_failed()) { | 4489 if (!has_failed()) { |
| 4669 that->CalculateOffsets(); | 4490 that->CalculateOffsets(); |
| 4670 } | 4491 } |
| 4671 } | 4492 } |
| 4672 | 4493 |
| 4673 | |
| 4674 void Analysis::VisitAction(ActionNode* that) { | 4494 void Analysis::VisitAction(ActionNode* that) { |
| 4675 RegExpNode* target = that->on_success(); | 4495 RegExpNode* target = that->on_success(); |
| 4676 EnsureAnalyzed(target); | 4496 EnsureAnalyzed(target); |
| 4677 if (!has_failed()) { | 4497 if (!has_failed()) { |
| 4678 // If the next node is interested in what it follows then this node | 4498 // If the next node is interested in what it follows then this node |
| 4679 // has to be interested too so it can pass the information on. | 4499 // has to be interested too so it can pass the information on. |
| 4680 that->info()->AddFromFollowing(target->info()); | 4500 that->info()->AddFromFollowing(target->info()); |
| 4681 } | 4501 } |
| 4682 } | 4502 } |
| 4683 | 4503 |
| 4684 | |
| 4685 void Analysis::VisitChoice(ChoiceNode* that) { | 4504 void Analysis::VisitChoice(ChoiceNode* that) { |
| 4686 NodeInfo* info = that->info(); | 4505 NodeInfo* info = that->info(); |
| 4687 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 4506 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 4688 RegExpNode* node = (*that->alternatives())[i].node(); | 4507 RegExpNode* node = (*that->alternatives())[i].node(); |
| 4689 EnsureAnalyzed(node); | 4508 EnsureAnalyzed(node); |
| 4690 if (has_failed()) return; | 4509 if (has_failed()) return; |
| 4691 // Anything the following nodes need to know has to be known by | 4510 // Anything the following nodes need to know has to be known by |
| 4692 // this node also, so it can pass it on. | 4511 // this node also, so it can pass it on. |
| 4693 info->AddFromFollowing(node->info()); | 4512 info->AddFromFollowing(node->info()); |
| 4694 } | 4513 } |
| 4695 } | 4514 } |
| 4696 | 4515 |
| 4697 | |
| 4698 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { | 4516 void Analysis::VisitLoopChoice(LoopChoiceNode* that) { |
| 4699 NodeInfo* info = that->info(); | 4517 NodeInfo* info = that->info(); |
| 4700 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 4518 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 4701 RegExpNode* node = (*that->alternatives())[i].node(); | 4519 RegExpNode* node = (*that->alternatives())[i].node(); |
| 4702 if (node != that->loop_node()) { | 4520 if (node != that->loop_node()) { |
| 4703 EnsureAnalyzed(node); | 4521 EnsureAnalyzed(node); |
| 4704 if (has_failed()) return; | 4522 if (has_failed()) return; |
| 4705 info->AddFromFollowing(node->info()); | 4523 info->AddFromFollowing(node->info()); |
| 4706 } | 4524 } |
| 4707 } | 4525 } |
| 4708 // Check the loop last since it may need the value of this node | 4526 // Check the loop last since it may need the value of this node |
| 4709 // to get a correct result. | 4527 // to get a correct result. |
| 4710 EnsureAnalyzed(that->loop_node()); | 4528 EnsureAnalyzed(that->loop_node()); |
| 4711 if (!has_failed()) { | 4529 if (!has_failed()) { |
| 4712 info->AddFromFollowing(that->loop_node()->info()); | 4530 info->AddFromFollowing(that->loop_node()->info()); |
| 4713 } | 4531 } |
| 4714 } | 4532 } |
| 4715 | 4533 |
| 4716 | |
| 4717 void Analysis::VisitBackReference(BackReferenceNode* that) { | 4534 void Analysis::VisitBackReference(BackReferenceNode* that) { |
| 4718 EnsureAnalyzed(that->on_success()); | 4535 EnsureAnalyzed(that->on_success()); |
| 4719 } | 4536 } |
| 4720 | 4537 |
| 4721 | |
| 4722 void Analysis::VisitAssertion(AssertionNode* that) { | 4538 void Analysis::VisitAssertion(AssertionNode* that) { |
| 4723 EnsureAnalyzed(that->on_success()); | 4539 EnsureAnalyzed(that->on_success()); |
| 4724 } | 4540 } |
| 4725 | 4541 |
| 4726 | |
| 4727 void BackReferenceNode::FillInBMInfo(intptr_t offset, | 4542 void BackReferenceNode::FillInBMInfo(intptr_t offset, |
| 4728 intptr_t budget, | 4543 intptr_t budget, |
| 4729 BoyerMooreLookahead* bm, | 4544 BoyerMooreLookahead* bm, |
| 4730 bool not_at_start) { | 4545 bool not_at_start) { |
| 4731 // Working out the set of characters that a backreference can match is too | 4546 // Working out the set of characters that a backreference can match is too |
| 4732 // hard, so we just say that any character can match. | 4547 // hard, so we just say that any character can match. |
| 4733 bm->SetRest(offset); | 4548 bm->SetRest(offset); |
| 4734 SaveBMInfo(bm, not_at_start, offset); | 4549 SaveBMInfo(bm, not_at_start, offset); |
| 4735 } | 4550 } |
| 4736 | 4551 |
| 4737 | |
| 4738 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == | 4552 COMPILE_ASSERT(BoyerMoorePositionInfo::kMapSize == |
| 4739 RegExpMacroAssembler::kTableSize); | 4553 RegExpMacroAssembler::kTableSize); |
| 4740 | 4554 |
| 4741 | |
| 4742 void ChoiceNode::FillInBMInfo(intptr_t offset, | 4555 void ChoiceNode::FillInBMInfo(intptr_t offset, |
| 4743 intptr_t budget, | 4556 intptr_t budget, |
| 4744 BoyerMooreLookahead* bm, | 4557 BoyerMooreLookahead* bm, |
| 4745 bool not_at_start) { | 4558 bool not_at_start) { |
| 4746 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); | 4559 ZoneGrowableArray<GuardedAlternative>* alts = alternatives(); |
| 4747 budget = (budget - 1) / alts->length(); | 4560 budget = (budget - 1) / alts->length(); |
| 4748 for (intptr_t i = 0; i < alts->length(); i++) { | 4561 for (intptr_t i = 0; i < alts->length(); i++) { |
| 4749 GuardedAlternative& alt = (*alts)[i]; | 4562 GuardedAlternative& alt = (*alts)[i]; |
| 4750 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 4563 if (alt.guards() != NULL && alt.guards()->length() != 0) { |
| 4751 bm->SetRest(offset); // Give up trying to fill in info. | 4564 bm->SetRest(offset); // Give up trying to fill in info. |
| 4752 SaveBMInfo(bm, not_at_start, offset); | 4565 SaveBMInfo(bm, not_at_start, offset); |
| 4753 return; | 4566 return; |
| 4754 } | 4567 } |
| 4755 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); | 4568 alt.node()->FillInBMInfo(offset, budget, bm, not_at_start); |
| 4756 } | 4569 } |
| 4757 SaveBMInfo(bm, not_at_start, offset); | 4570 SaveBMInfo(bm, not_at_start, offset); |
| 4758 } | 4571 } |
| 4759 | 4572 |
| 4760 | |
| 4761 void TextNode::FillInBMInfo(intptr_t initial_offset, | 4573 void TextNode::FillInBMInfo(intptr_t initial_offset, |
| 4762 intptr_t budget, | 4574 intptr_t budget, |
| 4763 BoyerMooreLookahead* bm, | 4575 BoyerMooreLookahead* bm, |
| 4764 bool not_at_start) { | 4576 bool not_at_start) { |
| 4765 if (initial_offset >= bm->length()) return; | 4577 if (initial_offset >= bm->length()) return; |
| 4766 intptr_t offset = initial_offset; | 4578 intptr_t offset = initial_offset; |
| 4767 intptr_t max_char = bm->max_char(); | 4579 intptr_t max_char = bm->max_char(); |
| 4768 for (intptr_t i = 0; i < elements()->length(); i++) { | 4580 for (intptr_t i = 0; i < elements()->length(); i++) { |
| 4769 if (offset >= bm->length()) { | 4581 if (offset >= bm->length()) { |
| 4770 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4582 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4811 } | 4623 } |
| 4812 if (offset >= bm->length()) { | 4624 if (offset >= bm->length()) { |
| 4813 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4625 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 4814 return; | 4626 return; |
| 4815 } | 4627 } |
| 4816 on_success()->FillInBMInfo(offset, budget - 1, bm, | 4628 on_success()->FillInBMInfo(offset, budget - 1, bm, |
| 4817 true); // Not at start after a text node. | 4629 true); // Not at start after a text node. |
| 4818 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4630 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 4819 } | 4631 } |
| 4820 | 4632 |
| 4821 | |
| 4822 #if !defined(DART_PRECOMPILED_RUNTIME) | 4633 #if !defined(DART_PRECOMPILED_RUNTIME) |
| 4823 RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 4634 RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
| 4824 RegExpCompileData* data, | 4635 RegExpCompileData* data, |
| 4825 const ParsedFunction* parsed_function, | 4636 const ParsedFunction* parsed_function, |
| 4826 const ZoneGrowableArray<const ICData*>& ic_data_array, | 4637 const ZoneGrowableArray<const ICData*>& ic_data_array, |
| 4827 intptr_t osr_id) { | 4638 intptr_t osr_id) { |
| 4828 ASSERT(!FLAG_interpret_irregexp); | 4639 ASSERT(!FLAG_interpret_irregexp); |
| 4829 Zone* zone = Thread::Current()->zone(); | 4640 Zone* zone = Thread::Current()->zone(); |
| 4830 | 4641 |
| 4831 const Function& function = parsed_function->function(); | 4642 const Function& function = parsed_function->function(); |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4923 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); | 4734 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
| 4924 | 4735 |
| 4925 if (FLAG_trace_irregexp) { | 4736 if (FLAG_trace_irregexp) { |
| 4926 macro_assembler->PrintBlocks(); | 4737 macro_assembler->PrintBlocks(); |
| 4927 } | 4738 } |
| 4928 | 4739 |
| 4929 return result; | 4740 return result; |
| 4930 } | 4741 } |
| 4931 #endif // !defined(DART_PRECOMPILED_RUNTIME) | 4742 #endif // !defined(DART_PRECOMPILED_RUNTIME) |
| 4932 | 4743 |
| 4933 | |
| 4934 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 4744 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
| 4935 RegExpCompileData* data, | 4745 RegExpCompileData* data, |
| 4936 const RegExp& regexp, | 4746 const RegExp& regexp, |
| 4937 bool is_one_byte, | 4747 bool is_one_byte, |
| 4938 bool is_sticky, | 4748 bool is_sticky, |
| 4939 Zone* zone) { | 4749 Zone* zone) { |
| 4940 ASSERT(FLAG_interpret_irregexp); | 4750 ASSERT(FLAG_interpret_irregexp); |
| 4941 const String& pattern = String::Handle(zone, regexp.pattern()); | 4751 const String& pattern = String::Handle(zone, regexp.pattern()); |
| 4942 | 4752 |
| 4943 ASSERT(!regexp.IsNull()); | 4753 ASSERT(!regexp.IsNull()); |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5026 RegExpEngine::CompilationResult result = | 4836 RegExpEngine::CompilationResult result = |
| 5027 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); | 4837 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
| 5028 | 4838 |
| 5029 if (FLAG_trace_irregexp) { | 4839 if (FLAG_trace_irregexp) { |
| 5030 macro_assembler->PrintBlocks(); | 4840 macro_assembler->PrintBlocks(); |
| 5031 } | 4841 } |
| 5032 | 4842 |
| 5033 return result; | 4843 return result; |
| 5034 } | 4844 } |
| 5035 | 4845 |
| 5036 | |
| 5037 static void CreateSpecializedFunction(Thread* thread, | 4846 static void CreateSpecializedFunction(Thread* thread, |
| 5038 Zone* zone, | 4847 Zone* zone, |
| 5039 const RegExp& regexp, | 4848 const RegExp& regexp, |
| 5040 intptr_t specialization_cid, | 4849 intptr_t specialization_cid, |
| 5041 bool sticky, | 4850 bool sticky, |
| 5042 const Object& owner) { | 4851 const Object& owner) { |
| 5043 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | 4852 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
| 5044 | 4853 |
| 5045 Function& fn = | 4854 Function& fn = |
| 5046 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), | 4855 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), |
| (...skipping 27 matching lines...) Expand all Loading... |
| 5074 | 4883 |
| 5075 // Cache the result. | 4884 // Cache the result. |
| 5076 regexp.set_function(specialization_cid, sticky, fn); | 4885 regexp.set_function(specialization_cid, sticky, fn); |
| 5077 | 4886 |
| 5078 fn.SetRegExpData(regexp, specialization_cid, sticky); | 4887 fn.SetRegExpData(regexp, specialization_cid, sticky); |
| 5079 fn.set_is_debuggable(false); | 4888 fn.set_is_debuggable(false); |
| 5080 | 4889 |
| 5081 // The function is compiled lazily during the first call. | 4890 // The function is compiled lazily during the first call. |
| 5082 } | 4891 } |
| 5083 | 4892 |
| 5084 | |
| 5085 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, | 4893 RawRegExp* RegExpEngine::CreateRegExp(Thread* thread, |
| 5086 const String& pattern, | 4894 const String& pattern, |
| 5087 bool multi_line, | 4895 bool multi_line, |
| 5088 bool ignore_case) { | 4896 bool ignore_case) { |
| 5089 Zone* zone = thread->zone(); | 4897 Zone* zone = thread->zone(); |
| 5090 const RegExp& regexp = RegExp::Handle(RegExp::New()); | 4898 const RegExp& regexp = RegExp::Handle(RegExp::New()); |
| 5091 | 4899 |
| 5092 regexp.set_pattern(pattern); | 4900 regexp.set_pattern(pattern); |
| 5093 | 4901 |
| 5094 if (multi_line) { | 4902 if (multi_line) { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 5113 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, | 4921 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/false, |
| 5114 owner); | 4922 owner); |
| 5115 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, | 4923 CreateSpecializedFunction(thread, zone, regexp, cid, /*sticky=*/true, |
| 5116 owner); | 4924 owner); |
| 5117 } | 4925 } |
| 5118 } | 4926 } |
| 5119 | 4927 |
| 5120 return regexp.raw(); | 4928 return regexp.raw(); |
| 5121 } | 4929 } |
| 5122 | 4930 |
| 5123 | |
| 5124 } // namespace dart | 4931 } // namespace dart |
| OLD | NEW |