OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
271 int to) { | 271 int to) { |
272 NoHandleAllocation no_handles; | 272 NoHandleAllocation no_handles; |
273 RegExpImpl::SetLastCaptureCount(array, 2); | 273 RegExpImpl::SetLastCaptureCount(array, 2); |
274 RegExpImpl::SetLastSubject(array, subject); | 274 RegExpImpl::SetLastSubject(array, subject); |
275 RegExpImpl::SetLastInput(array, subject); | 275 RegExpImpl::SetLastInput(array, subject); |
276 RegExpImpl::SetCapture(array, 0, from); | 276 RegExpImpl::SetCapture(array, 0, from); |
277 RegExpImpl::SetCapture(array, 1, to); | 277 RegExpImpl::SetCapture(array, 1, to); |
278 } | 278 } |
279 | 279 |
280 | 280 |
281 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, | 281 int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp, |
282 Handle<String> subject, | 282 Handle<String> subject, |
283 int index, | 283 int index, |
284 Handle<JSArray> last_match_info) { | 284 int32_t* output, |
285 Isolate* isolate = re->GetIsolate(); | 285 int output_size) { |
286 Isolate* isolate = regexp->GetIsolate(); | |
286 | 287 |
287 ASSERT(0 <= index); | 288 ASSERT(0 <= index); |
288 ASSERT(index <= subject->length()); | 289 ASSERT(index <= subject->length()); |
289 | 290 |
290 if (!subject->IsFlat()) FlattenString(subject); | 291 if (!subject->IsFlat()) FlattenString(subject); |
291 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 292 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
292 | 293 |
293 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); | 294 String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); |
294 int needle_len = needle->length(); | 295 int needle_len = needle->length(); |
295 ASSERT(needle->IsFlat()); | 296 ASSERT(needle->IsFlat()); |
297 ASSERT_LT(0, needle_len); | |
296 | 298 |
297 if (needle_len != 0) { | 299 if (index + needle_len > subject->length()) { |
298 if (index + needle_len > subject->length()) { | 300 return RegExpImpl::RE_FAILURE; |
299 return isolate->factory()->null_value(); | 301 } |
300 } | |
301 | 302 |
303 for (int i = 0; i < output_size; i += 2) { | |
302 String::FlatContent needle_content = needle->GetFlatContent(); | 304 String::FlatContent needle_content = needle->GetFlatContent(); |
303 String::FlatContent subject_content = subject->GetFlatContent(); | 305 String::FlatContent subject_content = subject->GetFlatContent(); |
304 ASSERT(needle_content.IsFlat()); | 306 ASSERT(needle_content.IsFlat()); |
305 ASSERT(subject_content.IsFlat()); | 307 ASSERT(subject_content.IsFlat()); |
306 // dispatch on type of strings | 308 // dispatch on type of strings |
307 index = (needle_content.IsAscii() | 309 index = (needle_content.IsAscii() |
308 ? (subject_content.IsAscii() | 310 ? (subject_content.IsAscii() |
309 ? SearchString(isolate, | 311 ? SearchString(isolate, |
310 subject_content.ToAsciiVector(), | 312 subject_content.ToAsciiVector(), |
311 needle_content.ToAsciiVector(), | 313 needle_content.ToAsciiVector(), |
312 index) | 314 index) |
313 : SearchString(isolate, | 315 : SearchString(isolate, |
314 subject_content.ToUC16Vector(), | 316 subject_content.ToUC16Vector(), |
315 needle_content.ToAsciiVector(), | 317 needle_content.ToAsciiVector(), |
316 index)) | 318 index)) |
317 : (subject_content.IsAscii() | 319 : (subject_content.IsAscii() |
318 ? SearchString(isolate, | 320 ? SearchString(isolate, |
319 subject_content.ToAsciiVector(), | 321 subject_content.ToAsciiVector(), |
320 needle_content.ToUC16Vector(), | 322 needle_content.ToUC16Vector(), |
321 index) | 323 index) |
322 : SearchString(isolate, | 324 : SearchString(isolate, |
323 subject_content.ToUC16Vector(), | 325 subject_content.ToUC16Vector(), |
324 needle_content.ToUC16Vector(), | 326 needle_content.ToUC16Vector(), |
325 index))); | 327 index))); |
326 if (index == -1) return isolate->factory()->null_value(); | 328 if (index == -1) { |
329 return i / 2; // Return number of matches. | |
330 } else { | |
331 output[i] = index; | |
332 output[i+1] = index + needle_len; | |
333 index += needle_len; | |
334 } | |
327 } | 335 } |
328 ASSERT(last_match_info->HasFastObjectElements()); | 336 return output_size / 2; |
337 } | |
329 | 338 |
330 { | 339 |
331 NoHandleAllocation no_handles; | 340 Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, |
332 FixedArray* array = FixedArray::cast(last_match_info->elements()); | 341 Handle<String> subject, |
333 SetAtomLastCapture(array, *subject, index, index + needle_len); | 342 int index, |
334 } | 343 Handle<JSArray> last_match_info) { |
344 Isolate* isolate = re->GetIsolate(); | |
345 | |
346 static const int kNumRegisters = 2; | |
347 STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); | |
348 int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); | |
349 | |
350 int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters); | |
351 | |
352 if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value(); | |
353 | |
354 ASSERT_EQ(res, RegExpImpl::RE_SUCCESS); | |
355 NoHandleAllocation no_handles; | |
356 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
357 SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]); | |
335 return last_match_info; | 358 return last_match_info; |
336 } | 359 } |
337 | 360 |
338 | 361 |
339 // Irregexp implementation. | 362 // Irregexp implementation. |
340 | 363 |
341 // Ensures that the regexp object contains a compiled version of the | 364 // Ensures that the regexp object contains a compiled version of the |
342 // source for either ASCII or non-ASCII strings. | 365 // source for either ASCII or non-ASCII strings. |
343 // If the compiled version doesn't already exist, it is compiled | 366 // If the compiled version doesn't already exist, it is compiled |
344 // from the source pattern. | 367 // from the source pattern. |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
504 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, | 527 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, |
505 Handle<String> subject) { | 528 Handle<String> subject) { |
506 if (!subject->IsFlat()) FlattenString(subject); | 529 if (!subject->IsFlat()) FlattenString(subject); |
507 | 530 |
508 // Check the asciiness of the underlying storage. | 531 // Check the asciiness of the underlying storage. |
509 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 532 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
510 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; | 533 if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1; |
511 | 534 |
512 #ifdef V8_INTERPRETED_REGEXP | 535 #ifdef V8_INTERPRETED_REGEXP |
513 // Byte-code regexp needs space allocated for all its registers. | 536 // Byte-code regexp needs space allocated for all its registers. |
514 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); | 537 // The result captures are copied to the start of the registers array |
538 // if the match succeeds. This way those registers are not clobbered | |
539 // when we set the last match info from last successful match. | |
540 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + | |
541 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | |
515 #else // V8_INTERPRETED_REGEXP | 542 #else // V8_INTERPRETED_REGEXP |
516 // Native regexp only needs room to output captures. Registers are handled | 543 // Native regexp only needs room to output captures. Registers are handled |
517 // internally. | 544 // internally. |
518 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; | 545 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; |
519 #endif // V8_INTERPRETED_REGEXP | 546 #endif // V8_INTERPRETED_REGEXP |
520 } | 547 } |
521 | 548 |
522 | 549 |
523 int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp, | 550 int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp, |
524 int registers_per_match, | 551 Handle<String> subject, |
525 int* max_matches) { | 552 int index, |
526 #ifdef V8_INTERPRETED_REGEXP | 553 int32_t* output, |
527 // Global loop in interpreted regexp is not implemented. Therefore we choose | 554 int output_size) { |
528 // the size of the offsets vector so that it can only store one match. | |
529 *max_matches = 1; | |
530 return registers_per_match; | |
531 #else // V8_INTERPRETED_REGEXP | |
532 int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize); | |
533 *max_matches = size / registers_per_match; | |
534 return size; | |
535 #endif // V8_INTERPRETED_REGEXP | |
536 } | |
537 | |
538 | |
539 int RegExpImpl::IrregexpExecRaw( | |
540 Handle<JSRegExp> regexp, | |
541 Handle<String> subject, | |
542 int index, | |
543 Vector<int> output) { | |
544 Isolate* isolate = regexp->GetIsolate(); | 555 Isolate* isolate = regexp->GetIsolate(); |
545 | 556 |
546 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); | 557 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); |
547 | 558 |
548 ASSERT(index >= 0); | 559 ASSERT(index >= 0); |
549 ASSERT(index <= subject->length()); | 560 ASSERT(index <= subject->length()); |
550 ASSERT(subject->IsFlat()); | 561 ASSERT(subject->IsFlat()); |
551 | 562 |
552 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); | 563 bool is_ascii = subject->IsAsciiRepresentationUnderneath(); |
553 | 564 |
554 #ifndef V8_INTERPRETED_REGEXP | 565 #ifndef V8_INTERPRETED_REGEXP |
555 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); | 566 ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); |
556 do { | 567 do { |
557 EnsureCompiledIrregexp(regexp, subject, is_ascii); | 568 EnsureCompiledIrregexp(regexp, subject, is_ascii); |
558 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); | 569 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); |
570 // The stack is used to allocate registers for the compiled regexp code. | |
571 // This means that in case of failure, the output registers array is left | |
572 // untouched and contains the capture results from the previous successful | |
573 // match. We can use that to set the last match info lazily. | |
559 NativeRegExpMacroAssembler::Result res = | 574 NativeRegExpMacroAssembler::Result res = |
560 NativeRegExpMacroAssembler::Match(code, | 575 NativeRegExpMacroAssembler::Match(code, |
561 subject, | 576 subject, |
562 output.start(), | 577 output, |
563 output.length(), | 578 output_size, |
564 index, | 579 index, |
565 isolate); | 580 isolate); |
566 if (res != NativeRegExpMacroAssembler::RETRY) { | 581 if (res != NativeRegExpMacroAssembler::RETRY) { |
567 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || | 582 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || |
568 isolate->has_pending_exception()); | 583 isolate->has_pending_exception()); |
569 STATIC_ASSERT( | 584 STATIC_ASSERT( |
570 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); | 585 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); |
571 STATIC_ASSERT( | 586 STATIC_ASSERT( |
572 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); | 587 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); |
573 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) | 588 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) |
574 == RE_EXCEPTION); | 589 == RE_EXCEPTION); |
575 return static_cast<IrregexpResult>(res); | 590 return static_cast<IrregexpResult>(res); |
576 } | 591 } |
577 // If result is RETRY, the string has changed representation, and we | 592 // If result is RETRY, the string has changed representation, and we |
578 // must restart from scratch. | 593 // must restart from scratch. |
579 // In this case, it means we must make sure we are prepared to handle | 594 // In this case, it means we must make sure we are prepared to handle |
580 // the, potentially, different subject (the string can switch between | 595 // the, potentially, different subject (the string can switch between |
581 // being internal and external, and even between being ASCII and UC16, | 596 // being internal and external, and even between being ASCII and UC16, |
582 // but the characters are always the same). | 597 // but the characters are always the same). |
583 IrregexpPrepare(regexp, subject); | 598 IrregexpPrepare(regexp, subject); |
584 is_ascii = subject->IsAsciiRepresentationUnderneath(); | 599 is_ascii = subject->IsAsciiRepresentationUnderneath(); |
585 } while (true); | 600 } while (true); |
586 UNREACHABLE(); | 601 UNREACHABLE(); |
587 return RE_EXCEPTION; | 602 return RE_EXCEPTION; |
588 #else // V8_INTERPRETED_REGEXP | 603 #else // V8_INTERPRETED_REGEXP |
589 | 604 |
590 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); | 605 ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp)); |
591 // We must have done EnsureCompiledIrregexp, so we can get the number of | 606 // We must have done EnsureCompiledIrregexp, so we can get the number of |
592 // registers. | 607 // registers. |
593 int* register_vector = output.start(); | |
594 int number_of_capture_registers = | 608 int number_of_capture_registers = |
595 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; | 609 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; |
610 int32_t* raw_output = &output[number_of_capture_registers]; | |
611 // We do not touch the actual capture result registers until we know there | |
612 // has been a match so that we can use those capture results to set the | |
613 // last match info. | |
596 for (int i = number_of_capture_registers - 1; i >= 0; i--) { | 614 for (int i = number_of_capture_registers - 1; i >= 0; i--) { |
597 register_vector[i] = -1; | 615 raw_output[i] = -1; |
598 } | 616 } |
599 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); | 617 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); |
600 | 618 |
601 IrregexpResult result = IrregexpInterpreter::Match(isolate, | 619 IrregexpResult result = IrregexpInterpreter::Match(isolate, |
602 byte_codes, | 620 byte_codes, |
603 subject, | 621 subject, |
604 register_vector, | 622 raw_output, |
605 index); | 623 index); |
624 if (result == RE_SUCCESS) { | |
625 // Copy capture results to the start of the registers array. | |
626 memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); | |
627 } | |
606 if (result == RE_EXCEPTION) { | 628 if (result == RE_EXCEPTION) { |
607 ASSERT(!isolate->has_pending_exception()); | 629 ASSERT(!isolate->has_pending_exception()); |
608 isolate->StackOverflow(); | 630 isolate->StackOverflow(); |
609 } | 631 } |
610 return result; | 632 return result; |
611 #endif // V8_INTERPRETED_REGEXP | 633 #endif // V8_INTERPRETED_REGEXP |
612 } | 634 } |
613 | 635 |
614 | 636 |
615 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, | 637 Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, |
616 Handle<String> subject, | 638 Handle<String> subject, |
617 int previous_index, | 639 int previous_index, |
618 Handle<JSArray> last_match_info) { | 640 Handle<JSArray> last_match_info) { |
619 Isolate* isolate = jsregexp->GetIsolate(); | 641 Isolate* isolate = regexp->GetIsolate(); |
620 ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); | 642 ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); |
621 | 643 |
622 // Prepare space for the return values. | 644 // Prepare space for the return values. |
623 #ifdef V8_INTERPRETED_REGEXP | 645 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) |
624 #ifdef DEBUG | |
625 if (FLAG_trace_regexp_bytecodes) { | 646 if (FLAG_trace_regexp_bytecodes) { |
626 String* pattern = jsregexp->Pattern(); | 647 String* pattern = regexp->Pattern(); |
627 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 648 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
628 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 649 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
629 } | 650 } |
630 #endif | 651 #endif |
631 #endif | 652 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); |
632 int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); | |
633 if (required_registers < 0) { | 653 if (required_registers < 0) { |
634 // Compiling failed with an exception. | 654 // Compiling failed with an exception. |
635 ASSERT(isolate->has_pending_exception()); | 655 ASSERT(isolate->has_pending_exception()); |
636 return Handle<Object>::null(); | 656 return Handle<Object>::null(); |
637 } | 657 } |
638 | 658 |
639 OffsetsVector registers(required_registers, isolate); | 659 int32_t* output_registers; |
660 if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
661 output_registers = NewArray<int32_t>(required_registers); | |
ulan
2012/08/03 11:58:05
Where does this array get released?
| |
662 } else { | |
663 output_registers = isolate->jsregexp_static_offsets_vector(); | |
664 } | |
640 | 665 |
641 int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, | 666 int res = RegExpImpl::IrregexpExecRaw( |
642 Vector<int>(registers.vector(), | 667 regexp, subject, previous_index, output_registers, required_registers); |
643 registers.length())); | |
644 if (res == RE_SUCCESS) { | 668 if (res == RE_SUCCESS) { |
645 int capture_register_count = | 669 int capture_count = |
646 (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; | 670 IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); |
647 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | 671 return SetLastMatchInfo( |
648 AssertNoAllocation no_gc; | 672 last_match_info, subject, capture_count, output_registers); |
649 int* register_vector = registers.vector(); | |
650 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
651 for (int i = 0; i < capture_register_count; i += 2) { | |
652 SetCapture(array, i, register_vector[i]); | |
653 SetCapture(array, i + 1, register_vector[i + 1]); | |
654 } | |
655 SetLastCaptureCount(array, capture_register_count); | |
656 SetLastSubject(array, *subject); | |
657 SetLastInput(array, *subject); | |
658 return last_match_info; | |
659 } | 673 } |
660 if (res == RE_EXCEPTION) { | 674 if (res == RE_EXCEPTION) { |
661 ASSERT(isolate->has_pending_exception()); | 675 ASSERT(isolate->has_pending_exception()); |
662 return Handle<Object>::null(); | 676 return Handle<Object>::null(); |
663 } | 677 } |
664 ASSERT(res == RE_FAILURE); | 678 ASSERT(res == RE_FAILURE); |
665 return isolate->factory()->null_value(); | 679 return isolate->factory()->null_value(); |
666 } | 680 } |
667 | 681 |
668 | 682 |
683 Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info, | |
684 Handle<String> subject, | |
685 int capture_count, | |
686 int32_t* match) { | |
687 int capture_register_count = (capture_count + 1) * 2; | |
688 last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); | |
689 AssertNoAllocation no_gc; | |
690 FixedArray* array = FixedArray::cast(last_match_info->elements()); | |
691 if (match != NULL) { | |
692 for (int i = 0; i < capture_register_count; i += 2) { | |
693 SetCapture(array, i, match[i]); | |
694 SetCapture(array, i + 1, match[i + 1]); | |
695 } | |
696 } | |
697 SetLastCaptureCount(array, capture_register_count); | |
698 SetLastSubject(array, *subject); | |
699 SetLastInput(array, *subject); | |
700 return last_match_info; | |
701 } | |
702 | |
703 | |
704 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, | |
705 Handle<String> subject, | |
706 bool is_global, | |
707 Isolate* isolate) { | |
708 #ifdef V8_INTERPRETED_REGEXP | |
709 bool interpreted = true; | |
710 #else | |
711 bool interpreted = false; | |
712 #endif // V8_INTERPRETED_REGEXP | |
713 | |
714 regexp_ = regexp; | |
715 subject_ = subject; | |
716 | |
717 if (regexp_->TypeTag() == JSRegExp::ATOM) { | |
718 static const int kAtomRegistersPerMatch = 2; | |
719 registers_per_match_ = kAtomRegistersPerMatch; | |
720 // There is no distinction between interpreted and native for atom regexps. | |
721 interpreted = false; | |
722 } else { | |
723 registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); | |
724 if (registers_per_match_ < 0) { | |
725 num_matches_ = -1; // Signal exception. | |
726 return; | |
727 } | |
728 } | |
729 | |
730 if (is_global && !interpreted) { | |
731 register_array_size_ = | |
732 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); | |
733 max_matches_ = register_array_size_ / registers_per_match_; | |
734 } else { | |
735 // Global loop in interpreted regexp is not implemented. We choose | |
736 // the size of the offsets vector so that it can only store one match. | |
737 register_array_size_ = registers_per_match_; | |
738 max_matches_ = 1; | |
739 } | |
740 | |
741 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
742 register_array_ = NewArray<int32_t>(register_array_size_); | |
743 } else { | |
744 register_array_ = isolate->jsregexp_static_offsets_vector(); | |
745 } | |
746 | |
747 // Set state so that fetching the results the first time triggers a call | |
748 // to the compiled regexp. | |
749 current_match_index_ = max_matches_; | |
750 num_matches_ = max_matches_; | |
751 int32_t* last_match = | |
752 ®ister_array_[register_array_size_ - registers_per_match_]; | |
753 last_match[0] = -1; | |
ulan
2012/08/03 11:58:05
This assumes that registers_per_match_ >= 2. If th
Yang
2012/08/03 12:58:22
Registers generally don't need initializing. I ini
| |
754 last_match[1] = 0; | |
755 } | |
756 | |
757 | |
758 RegExpImpl::GlobalCache::~GlobalCache() { | |
759 // Deallocate the register array if we allocated it in the constructor | |
760 // (as opposed to using the existing jsregexp_static_offsets_vector). | |
761 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { | |
762 DeleteArray(register_array_); | |
763 } | |
764 } | |
765 | |
766 | |
767 int32_t* RegExpImpl::GlobalCache::FetchNext() { | |
768 current_match_index_++; | |
769 if (current_match_index_ >= num_matches_) { | |
770 // Current batch of results exhausted. | |
771 // Fail if last batch was not even fully filled. | |
772 if (num_matches_ < max_matches_) { | |
773 num_matches_ = 0; // Signal failed match. | |
774 return NULL; | |
775 } | |
776 | |
777 int32_t* last_match = ®ister_array_[register_array_size_ | |
778 - registers_per_match_]; | |
779 int last_end_index = last_match[1]; | |
780 | |
781 if (regexp_->TypeTag() == JSRegExp::ATOM) { | |
782 num_matches_ = RegExpImpl::AtomExecRaw(regexp_, subject_, last_end_index, | |
783 register_array_, register_array_size_); | |
784 } else { | |
785 int last_start_index = last_match[0]; | |
786 if (last_start_index == last_end_index) | |
787 last_end_index++; | |
788 if (last_end_index > subject_->length()) { | |
789 num_matches_ = 0; // Signal failed match. | |
790 return NULL; | |
791 } | |
792 num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_, subject_, | |
793 last_end_index, register_array_, register_array_size_); | |
794 } | |
795 | |
796 if (num_matches_ <= 0) | |
797 return NULL; | |
798 current_match_index_ = 0; | |
799 return register_array_; | |
800 } else { | |
801 return ®ister_array_[current_match_index_ * registers_per_match_]; | |
802 } | |
803 } | |
804 | |
805 | |
806 int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() { | |
807 int index = current_match_index_ * registers_per_match_; | |
808 if (num_matches_ == 0) { | |
809 // After a failed match we shift back by one result. | |
810 index -= registers_per_match_; | |
811 } | |
812 return ®ister_array_[index]; | |
813 } | |
814 | |
815 | |
669 // ------------------------------------------------------------------- | 816 // ------------------------------------------------------------------- |
670 // Implementation of the Irregexp regular expression engine. | 817 // Implementation of the Irregexp regular expression engine. |
671 // | 818 // |
672 // The Irregexp regular expression engine is intended to be a complete | 819 // The Irregexp regular expression engine is intended to be a complete |
673 // implementation of ECMAScript regular expressions. It generates either | 820 // implementation of ECMAScript regular expressions. It generates either |
674 // bytecodes or native code. | 821 // bytecodes or native code. |
675 | 822 |
676 // The Irregexp regexp engine is structured in three steps. | 823 // The Irregexp regexp engine is structured in three steps. |
677 // 1) The parser generates an abstract syntax tree. See ast.cc. | 824 // 1) The parser generates an abstract syntax tree. See ast.cc. |
678 // 2) From the AST a node network is created. The nodes are all | 825 // 2) From the AST a node network is created. The nodes are all |
(...skipping 5324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6003 } | 6150 } |
6004 | 6151 |
6005 return compiler.Assemble(¯o_assembler, | 6152 return compiler.Assemble(¯o_assembler, |
6006 node, | 6153 node, |
6007 data->capture_count, | 6154 data->capture_count, |
6008 pattern); | 6155 pattern); |
6009 } | 6156 } |
6010 | 6157 |
6011 | 6158 |
6012 }} // namespace v8::internal | 6159 }} // namespace v8::internal |
OLD | NEW |