| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 217 Handle<String> subject, | 217 Handle<String> subject, |
| 218 int index, | 218 int index, |
| 219 Handle<JSArray> last_match_info) { | 219 Handle<JSArray> last_match_info) { |
| 220 Isolate* isolate = re->GetIsolate(); | 220 Isolate* isolate = re->GetIsolate(); |
| 221 | 221 |
| 222 ASSERT(0 <= index); | 222 ASSERT(0 <= index); |
| 223 ASSERT(index <= subject->length()); | 223 ASSERT(index <= subject->length()); |
| 224 | 224 |
| 225 if (!subject->IsFlat()) FlattenString(subject); | 225 if (!subject->IsFlat()) FlattenString(subject); |
| 226 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 226 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 227 // Extract flattened substrings of cons strings before determining asciiness. | |
| 228 | 227 |
| 229 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); | 228 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); |
| 230 int needle_len = needle->length(); | 229 int needle_len = needle->length(); |
| 231 ASSERT(needle->IsFlat()); | 230 ASSERT(needle->IsFlat()); |
| 232 | 231 |
| 233 if (needle_len != 0) { | 232 if (needle_len != 0) { |
| 234 if (index + needle_len > subject->length()) { | 233 if (index + needle_len > subject->length()) { |
| 235 return isolate->factory()->null_value(); | 234 return isolate->factory()->null_value(); |
| 236 } | 235 } |
| 237 | 236 |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 340 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); | 339 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); |
| 341 ASSERT(error_string->IsString()); | 340 ASSERT(error_string->IsString()); |
| 342 Handle<String> error_message(String::cast(error_string)); | 341 Handle<String> error_message(String::cast(error_string)); |
| 343 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); | 342 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); |
| 344 return false; | 343 return false; |
| 345 } | 344 } |
| 346 | 345 |
| 347 JSRegExp::Flags flags = re->GetFlags(); | 346 JSRegExp::Flags flags = re->GetFlags(); |
| 348 | 347 |
| 349 Handle<String> pattern(re->Pattern()); | 348 Handle<String> pattern(re->Pattern()); |
| 350 if (!pattern->IsFlat()) { | 349 if (!pattern->IsFlat()) FlattenString(pattern); |
| 351 FlattenString(pattern); | |
| 352 } | |
| 353 | |
| 354 RegExpCompileData compile_data; | 350 RegExpCompileData compile_data; |
| 355 FlatStringReader reader(isolate, pattern); | 351 FlatStringReader reader(isolate, pattern); |
| 356 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), | 352 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), |
| 357 &compile_data)) { | 353 &compile_data)) { |
| 358 // Throw an exception if we fail to parse the pattern. | 354 // Throw an exception if we fail to parse the pattern. |
| 359 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. | 355 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. |
| 360 ThrowRegExpException(re, | 356 ThrowRegExpException(re, |
| 361 pattern, | 357 pattern, |
| 362 compile_data.error, | 358 compile_data.error, |
| 363 "malformed_regexp"); | 359 "malformed_regexp"); |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 427 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, | 423 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, |
| 428 JSRegExp::IRREGEXP, | 424 JSRegExp::IRREGEXP, |
| 429 pattern, | 425 pattern, |
| 430 flags, | 426 flags, |
| 431 capture_count); | 427 capture_count); |
| 432 } | 428 } |
| 433 | 429 |
| 434 | 430 |
| 435 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 431 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
| 436 Handle<String> subject) { | 432 Handle<String> subject) { |
| 437 if (!subject->IsFlat()) { | 433 if (!subject->IsFlat()) FlattenString(subject); |
| 438 FlattenString(subject); | 434 |
| 439 } | |
| 440 // Check the asciiness of the underlying storage. | 435 // Check the asciiness of the underlying storage. |
| 441 bool is_ascii; | 436 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 442 { | 437 if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1; |
| 443 AssertNoAllocation no_gc; | 438 |
| 444 String* sequential_string = *subject; | |
| 445 if (subject->IsConsString()) { | |
| 446 sequential_string = ConsString::cast(*subject)->first(); | |
| 447 } | |
| 448 is_ascii = sequential_string->IsAsciiRepresentation(); | |
| 449 } | |
| 450 if (!EnsureCompiledIrregexp(regexp, is_ascii)) { | |
| 451 return -1; | |
| 452 } | |
| 453 #ifdef V8_INTERPRETED_REGEXP | 439 #ifdef V8_INTERPRETED_REGEXP |
| 454 // Byte-code regexp needs space allocated for all its registers. | 440 // Byte-code regexp needs space allocated for all its registers. |
| 455 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | 441 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); |
| 456 #else // V8_INTERPRETED_REGEXP | 442 #else // V8_INTERPRETED_REGEXP |
| 457 // Native regexp only needs room to output captures. Registers are handled | 443 // Native regexp only needs room to output captures. Registers are handled |
| 458 // internally. | 444 // internally. |
| 459 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 445 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
| 460 #endif // V8_INTERPRETED_REGEXP | 446 #endif // V8_INTERPRETED_REGEXP |
| 461 } | 447 } |
| 462 | 448 |
| 463 | 449 |
| 464 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( | 450 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( |
| 465 Handle<JSRegExp> regexp, | 451 Handle<JSRegExp> regexp, |
| 466 Handle<String> subject, | 452 Handle<String> subject, |
| 467 int index, | 453 int index, |
| 468 Vector<int> output) { | 454 Vector<int> output) { |
| 469 Isolate* isolate = regexp->GetIsolate(); | 455 Isolate* isolate = regexp->GetIsolate(); |
| 470 | 456 |
| 471 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 457 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
| 472 | 458 |
| 473 ASSERT(index >= 0); | 459 ASSERT(index >= 0); |
| 474 ASSERT(index <= subject->length()); | 460 ASSERT(index <= subject->length()); |
| 475 ASSERT(subject->IsFlat()); | 461 ASSERT(subject->IsFlat()); |
| 476 | 462 |
| 477 // A flat ASCII string might have a two-byte first part. | 463 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 478 if (subject->IsConsString()) { | |
| 479 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate); | |
| 480 } | |
| 481 | 464 |
| 482 #ifndef V8_INTERPRETED_REGEXP | 465 #ifndef V8_INTERPRETED_REGEXP |
| 483 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 466 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
| 484 do { | 467 do { |
| 485 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 486 EnsureCompiledIrregexp(regexp, is_ascii); | 468 EnsureCompiledIrregexp(regexp, is_ascii); |
| 487 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 469 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
| 488 NativeRegExpMacroAssembler::Result res = | 470 NativeRegExpMacroAssembler::Result res = |
| 489 NativeRegExpMacroAssembler::Match(code, | 471 NativeRegExpMacroAssembler::Match(code, |
| 490 subject, | 472 subject, |
| 491 output.start(), | 473 output.start(), |
| 492 output.length(), | 474 output.length(), |
| 493 index, | 475 index, |
| 494 isolate); | 476 isolate); |
| 495 if (res != NativeRegExpMacroAssembler::RETRY) { | 477 if (res != NativeRegExpMacroAssembler::RETRY) { |
| 496 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 478 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
| 497 isolate->has_pending_exception()); | 479 isolate->has_pending_exception()); |
| 498 STATIC_ASSERT( | 480 STATIC_ASSERT( |
| 499 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 481 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
| 500 STATIC_ASSERT( | 482 STATIC_ASSERT( |
| 501 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 483 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
| 502 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 484 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
| 503 == RE_EXCEPTION); | 485 == RE_EXCEPTION); |
| 504 return static_cast<IrregexpResult>(res); | 486 return static_cast<IrregexpResult>(res); |
| 505 } | 487 } |
| 506 // If result is RETRY, the string has changed representation, and we | 488 // If result is RETRY, the string has changed representation, and we |
| 507 // must restart from scratch. | 489 // must restart from scratch. |
| 508 // In this case, it means we must make sure we are prepared to handle | 490 // In this case, it means we must make sure we are prepared to handle |
| 509 // the, potentially, different subject (the string can switch between | 491 // the, potentially, different subject (the string can switch between |
| 510 // being internal and external, and even between being ASCII and UC16, | 492 // being internal and external, and even between being ASCII and UC16, |
| 511 // but the characters are always the same). | 493 // but the characters are always the same). |
| 512 IrregexpPrepare(regexp, subject); | 494 IrregexpPrepare(regexp, subject); |
| 495 is_ascii = subject->IsAsciiRepresentationUnderneath(); |
| 513 } while (true); | 496 } while (true); |
| 514 UNREACHABLE(); | 497 UNREACHABLE(); |
| 515 return RE_EXCEPTION; | 498 return RE_EXCEPTION; |
| 516 #else // V8_INTERPRETED_REGEXP | 499 #else // V8_INTERPRETED_REGEXP |
| 517 | 500 |
| 518 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 501 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); |
| 519 bool is_ascii = subject->IsAsciiRepresentation(); | |
| 520 // We must have done EnsureCompiledIrregexp, so we can get the number of | 502 // We must have done EnsureCompiledIrregexp, so we can get the number of |
| 521 // registers. | 503 // registers. |
| 522 int* register_vector = output.start(); | 504 int* register_vector = output.start(); |
| 523 int number_of_capture_registers = | 505 int number_of_capture_registers = |
| 524 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 506 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
| 525 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 507 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
| 526 register_vector[i] = -1; | 508 register_vector[i] = -1; |
| 527 } | 509 } |
| 528 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); | 510 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
| 529 | 511 |
| (...skipping 2142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2672 } else { | 2654 } else { |
| 2673 return elm.cp_offset + elm.data.u_atom->data().length(); | 2655 return elm.cp_offset + elm.data.u_atom->data().length(); |
| 2674 } | 2656 } |
| 2675 } | 2657 } |
| 2676 | 2658 |
| 2677 | 2659 |
| 2678 // Finds the fixed match length of a sequence of nodes that goes from | 2660 // Finds the fixed match length of a sequence of nodes that goes from |
| 2679 // this alternative and back to this choice node. If there are variable | 2661 // this alternative and back to this choice node. If there are variable |
| 2680 // length nodes or other complications in the way then return a sentinel | 2662 // length nodes or other complications in the way then return a sentinel |
| 2681 // value indicating that a greedy loop cannot be constructed. | 2663 // value indicating that a greedy loop cannot be constructed. |
| 2682 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) { | 2664 int ChoiceNode::GreedyLoopTextLengthForAlternative( |
| 2665 GuardedAlternative* alternative) { |
| 2683 int length = 0; | 2666 int length = 0; |
| 2684 RegExpNode* node = alternative->node(); | 2667 RegExpNode* node = alternative->node(); |
| 2685 // Later we will generate code for all these text nodes using recursion | 2668 // Later we will generate code for all these text nodes using recursion |
| 2686 // so we have to limit the max number. | 2669 // so we have to limit the max number. |
| 2687 int recursion_depth = 0; | 2670 int recursion_depth = 0; |
| 2688 while (node != this) { | 2671 while (node != this) { |
| 2689 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | 2672 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { |
| 2690 return kNodeIsTooComplexForGreedyLoops; | 2673 return kNodeIsTooComplexForGreedyLoops; |
| 2691 } | 2674 } |
| 2692 int node_length = node->GreedyLoopTextLength(); | 2675 int node_length = node->GreedyLoopTextLength(); |
| (...skipping 18 matching lines...) Expand all Loading... |
| 2711 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { | 2694 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { |
| 2712 ASSERT_EQ(continue_node_, NULL); | 2695 ASSERT_EQ(continue_node_, NULL); |
| 2713 AddAlternative(alt); | 2696 AddAlternative(alt); |
| 2714 continue_node_ = alt.node(); | 2697 continue_node_ = alt.node(); |
| 2715 } | 2698 } |
| 2716 | 2699 |
| 2717 | 2700 |
| 2718 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2701 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 2719 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2702 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 2720 if (trace->stop_node() == this) { | 2703 if (trace->stop_node() == this) { |
| 2721 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | 2704 int text_length = |
| 2705 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); |
| 2722 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); | 2706 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); |
| 2723 // Update the counter-based backtracking info on the stack. This is an | 2707 // Update the counter-based backtracking info on the stack. This is an |
| 2724 // optimization for greedy loops (see below). | 2708 // optimization for greedy loops (see below). |
| 2725 ASSERT(trace->cp_offset() == text_length); | 2709 ASSERT(trace->cp_offset() == text_length); |
| 2726 macro_assembler->AdvanceCurrentPosition(text_length); | 2710 macro_assembler->AdvanceCurrentPosition(text_length); |
| 2727 macro_assembler->GoTo(trace->loop_label()); | 2711 macro_assembler->GoTo(trace->loop_label()); |
| 2728 return; | 2712 return; |
| 2729 } | 2713 } |
| 2730 ASSERT(trace->stop_node() == NULL); | 2714 ASSERT(trace->stop_node() == NULL); |
| 2731 if (!trace->is_trivial()) { | 2715 if (!trace->is_trivial()) { |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2904 int new_flush_budget = trace->flush_budget() / choice_count; | 2888 int new_flush_budget = trace->flush_budget() / choice_count; |
| 2905 if (trace->flush_budget() == 0 && trace->actions() != NULL) { | 2889 if (trace->flush_budget() == 0 && trace->actions() != NULL) { |
| 2906 trace->Flush(compiler, this); | 2890 trace->Flush(compiler, this); |
| 2907 return; | 2891 return; |
| 2908 } | 2892 } |
| 2909 | 2893 |
| 2910 RecursionCheck rc(compiler); | 2894 RecursionCheck rc(compiler); |
| 2911 | 2895 |
| 2912 Trace* current_trace = trace; | 2896 Trace* current_trace = trace; |
| 2913 | 2897 |
| 2914 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | 2898 int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); |
| 2915 bool greedy_loop = false; | 2899 bool greedy_loop = false; |
| 2916 Label greedy_loop_label; | 2900 Label greedy_loop_label; |
| 2917 Trace counter_backtrack_trace; | 2901 Trace counter_backtrack_trace; |
| 2918 counter_backtrack_trace.set_backtrack(&greedy_loop_label); | 2902 counter_backtrack_trace.set_backtrack(&greedy_loop_label); |
| 2919 if (not_at_start()) counter_backtrack_trace.set_at_start(false); | 2903 if (not_at_start()) counter_backtrack_trace.set_at_start(false); |
| 2920 | 2904 |
| 2921 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 2905 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { |
| 2922 // Here we have special handling for greedy loops containing only text nodes | 2906 // Here we have special handling for greedy loops containing only text nodes |
| 2923 // and other simple nodes. These are handled by pushing the current | 2907 // and other simple nodes. These are handled by pushing the current |
| 2924 // position on the stack and then incrementing the current position each | 2908 // position on the stack and then incrementing the current position each |
| (...skipping 2424 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5349 } | 5333 } |
| 5350 | 5334 |
| 5351 return compiler.Assemble(¯o_assembler, | 5335 return compiler.Assemble(¯o_assembler, |
| 5352 node, | 5336 node, |
| 5353 data->capture_count, | 5337 data->capture_count, |
| 5354 pattern); | 5338 pattern); |
| 5355 } | 5339 } |
| 5356 | 5340 |
| 5357 | 5341 |
| 5358 }} // namespace v8::internal | 5342 }} // namespace v8::internal |
| OLD | NEW |