| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 3748 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3759 SetLastMatchInfoNoCaptures(subject, | 3759 SetLastMatchInfoNoCaptures(subject, |
| 3760 last_match_info, | 3760 last_match_info, |
| 3761 match_pos, | 3761 match_pos, |
| 3762 match_pos + pattern->length()); | 3762 match_pos + pattern->length()); |
| 3763 return true; | 3763 return true; |
| 3764 } | 3764 } |
| 3765 return false; // No matches at all. | 3765 return false; // No matches at all. |
| 3766 } | 3766 } |
| 3767 | 3767 |
| 3768 | 3768 |
| 3769 // Global search for matches to /\b\w+\b/ in an ASCII subject. |
| 3770 static RegExpImpl::IrregexpResult SearchBoundedWords( |
| 3771 Isolate* isolate, |
| 3772 Handle<String> subject, |
| 3773 Handle<JSArray> last_match_array, |
| 3774 FixedArrayBuilder* builder) { |
| 3775 int word_start = 0; |
| 3776 int word_end = 0; |
| 3777 bool is_word_at_previous_pos = false; |
| 3778 int subject_length = subject->length(); |
| 3779 String::FlatContent content = subject->GetFlatContent(); |
| 3780 ASSERT(content.IsAscii()); |
| 3781 const char* subject_chars = content.ToAsciiVector().start(); |
| 3782 |
| 3783 // This bitmap corresponds to whether each of the ASCII chars (0-127) match |
| 3784 // to the regular expression \w (equivalent to [0-9A-Z_a-z]). |
| 3785 static const uint32_t bitmap[4] = { |
| 3786 0x00000000, // ASCII 0-31 |
| 3787 0x03FF0000, // ASCII 32-63 : 0-9 |
| 3788 0x87FFFFFE, // ASCII 64-95 : A-Z and _ |
| 3789 0x07FFFFFE // ASCII 96-127 : a-z |
| 3790 }; |
| 3791 static const char bitmap_block_shift = 5; |
| 3792 STATIC_ASSERT(1 << bitmap_block_shift == sizeof(bitmap[0]) * kBitsPerByte); |
| 3793 static const char bitmap_block_mask = (1 << bitmap_block_shift) - 1; |
| 3794 |
| 3795 for (int current_pos = 0; current_pos < subject_length; current_pos++) { |
| 3796 char c = subject_chars[current_pos]; |
| 3797 // Lookup character in one of the four bitmap blocks. |
| 3798 bool is_word_at_current_pos = |
| 3799 (bitmap[c >> bitmap_block_shift] >> (c & bitmap_block_mask)) & 1; |
| 3800 if (is_word_at_current_pos != is_word_at_previous_pos) { |
| 3801 if (is_word_at_current_pos) { |
| 3802 // Word boundary at word start. |
| 3803 word_start = current_pos; |
| 3804 if (word_start != 0) { |
| 3805 // Add subject slice between last word and current word. |
| 3806 ReplacementStringBuilder::AddSubjectSlice(builder, |
| 3807 word_end, |
| 3808 word_start); |
| 3809 } |
| 3810 } else { |
| 3811 // Reserve capacity for this entry and for the following subject slice. |
| 3812 STATIC_ASSERT(kMaxBuilderEntriesPerRegExpMatch >= 3); |
| 3813 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| 3814 // Word boundary at word end. Capture word. |
| 3815 word_end = current_pos; |
| 3816 HandleScope scope(isolate); |
| 3817 builder->Add(*isolate->factory()->NewSubString(subject, |
| 3818 word_start, |
| 3819 word_end)); |
| 3820 } |
| 3821 is_word_at_previous_pos = is_word_at_current_pos; |
| 3822 } |
| 3823 } |
| 3824 |
| 3825 // Handle last piece: capture last word or add subject slice for non-word. |
| 3826 if (is_word_at_previous_pos) { |
| 3827 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); |
| 3828 HandleScope scope(isolate); |
| 3829 builder->Add(*isolate->factory()->NewSubString(subject, |
| 3830 word_start, |
| 3831 subject_length)); |
| 3832 word_end = subject_length; |
| 3833 } else if (word_end < subject_length) { |
| 3834 ReplacementStringBuilder::AddSubjectSlice(builder, |
| 3835 word_end, |
| 3836 subject_length); |
| 3837 } |
| 3838 |
| 3839 if (word_end !=0 || is_word_at_previous_pos) { |
| 3840 SetLastMatchInfoNoCaptures(subject, last_match_array, word_start, word_end); |
| 3841 return RegExpImpl::RE_SUCCESS; |
| 3842 } else { |
| 3843 return RegExpImpl::RE_FAILURE; |
| 3844 } |
| 3845 } |
| 3846 |
| 3847 |
| 3769 static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple( | 3848 static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple( |
| 3770 Isolate* isolate, | 3849 Isolate* isolate, |
| 3771 Handle<String> subject, | 3850 Handle<String> subject, |
| 3772 Handle<JSRegExp> regexp, | 3851 Handle<JSRegExp> regexp, |
| 3773 Handle<JSArray> last_match_array, | 3852 Handle<JSArray> last_match_array, |
| 3774 FixedArrayBuilder* builder) { | 3853 FixedArrayBuilder* builder) { |
| 3775 ASSERT(subject->IsFlat()); | 3854 ASSERT(subject->IsFlat()); |
| 3855 |
| 3856 if (subject->IsAsciiRepresentationUnderneath() && |
| 3857 regexp->Pattern()->Equals( |
| 3858 isolate->heap()->bounded_word_regexp_symbol())) { |
| 3859 return SearchBoundedWords(isolate, subject, last_match_array, builder); |
| 3860 } |
| 3861 |
| 3776 int match_start = -1; | 3862 int match_start = -1; |
| 3777 int match_end = 0; | 3863 int match_end = 0; |
| 3778 int pos = 0; | 3864 int pos = 0; |
| 3779 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); | 3865 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); |
| 3780 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; | 3866 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; |
| 3781 | 3867 |
| 3782 OffsetsVector registers(required_registers, isolate); | 3868 OffsetsVector registers(required_registers, isolate); |
| 3783 Vector<int32_t> register_vector(registers.vector(), registers.length()); | 3869 Vector<int32_t> register_vector(registers.vector(), registers.length()); |
| 3784 int subject_length = subject->length(); | 3870 int subject_length = subject->length(); |
| 3785 bool first = true; | 3871 bool first = true; |
| (...skipping 9622 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 13408 // Handle last resort GC and make sure to allow future allocations | 13494 // Handle last resort GC and make sure to allow future allocations |
| 13409 // to grow the heap without causing GCs (if possible). | 13495 // to grow the heap without causing GCs (if possible). |
| 13410 isolate->counters()->gc_last_resort_from_js()->Increment(); | 13496 isolate->counters()->gc_last_resort_from_js()->Increment(); |
| 13411 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, | 13497 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, |
| 13412 "Runtime::PerformGC"); | 13498 "Runtime::PerformGC"); |
| 13413 } | 13499 } |
| 13414 } | 13500 } |
| 13415 | 13501 |
| 13416 | 13502 |
| 13417 } } // namespace v8::internal | 13503 } } // namespace v8::internal |
| OLD | NEW |