Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(246)

Side by Side Diff: src/runtime.cc

Issue 10191001: Fast path for the regexp of bounded words. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/heap.h ('k') | test/mjsunit/string-replace-word-boundary.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 3748 matching lines...) Expand 10 before | Expand all | Expand 10 after
3759 SetLastMatchInfoNoCaptures(subject, 3759 SetLastMatchInfoNoCaptures(subject,
3760 last_match_info, 3760 last_match_info,
3761 match_pos, 3761 match_pos,
3762 match_pos + pattern->length()); 3762 match_pos + pattern->length());
3763 return true; 3763 return true;
3764 } 3764 }
3765 return false; // No matches at all. 3765 return false; // No matches at all.
3766 } 3766 }
3767 3767
3768 3768
3769 // Global search for matches to /\b\w+\b/ in an ASCII subject.
3770 static RegExpImpl::IrregexpResult SearchBoundedWords(
3771 Isolate* isolate,
3772 Handle<String> subject,
3773 Handle<JSArray> last_match_array,
3774 FixedArrayBuilder* builder) {
3775 int word_start = 0;
3776 int word_end = 0;
3777 bool is_word_at_previous_pos = false;
3778 int subject_length = subject->length();
3779 String::FlatContent content = subject->GetFlatContent();
3780 ASSERT(content.IsAscii());
3781 const char* subject_chars = content.ToAsciiVector().start();
3782
3783 // This bitmap corresponds to whether each of the ASCII chars (0-127) match
3784 // to the regular expression \w (equivalent to [0-9A-Z_a-z]).
3785 static const uint32_t bitmap[4] = {
3786 0x00000000, // ASCII 0-31
3787 0x03FF0000, // ASCII 32-63 : 0-9
3788 0x87FFFFFE, // ASCII 64-95 : A-Z and _
3789 0x07FFFFFE // ASCII 96-127 : a-z
3790 };
3791 static const char bitmap_block_shift = 5;
3792 STATIC_ASSERT(1 << bitmap_block_shift == sizeof(bitmap[0]) * kBitsPerByte);
3793 static const char bitmap_block_mask = (1 << bitmap_block_shift) - 1;
3794
3795 for (int current_pos = 0; current_pos < subject_length; current_pos++) {
3796 char c = subject_chars[current_pos];
3797 // Lookup character in one of the four bitmap blocks.
3798 bool is_word_at_current_pos =
3799 (bitmap[c >> bitmap_block_shift] >> (c & bitmap_block_mask)) & 1;
3800 if (is_word_at_current_pos != is_word_at_previous_pos) {
3801 if (is_word_at_current_pos) {
3802 // Word boundary at word start.
3803 word_start = current_pos;
3804 if (word_start != 0) {
3805 // Add subject slice between last word and current word.
3806 ReplacementStringBuilder::AddSubjectSlice(builder,
3807 word_end,
3808 word_start);
3809 }
3810 } else {
3811 // Reserve capacity for this entry and for the following subject slice.
3812 STATIC_ASSERT(kMaxBuilderEntriesPerRegExpMatch >= 3);
3813 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3814 // Word boundary at word end. Capture word.
3815 word_end = current_pos;
3816 HandleScope scope(isolate);
3817 builder->Add(*isolate->factory()->NewSubString(subject,
3818 word_start,
3819 word_end));
3820 }
3821 is_word_at_previous_pos = is_word_at_current_pos;
3822 }
3823 }
3824
3825 // Handle last piece: capture last word or add subject slice for non-word.
3826 if (is_word_at_previous_pos) {
3827 builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
3828 HandleScope scope(isolate);
3829 builder->Add(*isolate->factory()->NewSubString(subject,
3830 word_start,
3831 subject_length));
3832 word_end = subject_length;
3833 } else if (word_end < subject_length) {
3834 ReplacementStringBuilder::AddSubjectSlice(builder,
3835 word_end,
3836 subject_length);
3837 }
3838
3839 if (word_end !=0 || is_word_at_previous_pos) {
3840 SetLastMatchInfoNoCaptures(subject, last_match_array, word_start, word_end);
3841 return RegExpImpl::RE_SUCCESS;
3842 } else {
3843 return RegExpImpl::RE_FAILURE;
3844 }
3845 }
3846
3847
3769 static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple( 3848 static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
3770 Isolate* isolate, 3849 Isolate* isolate,
3771 Handle<String> subject, 3850 Handle<String> subject,
3772 Handle<JSRegExp> regexp, 3851 Handle<JSRegExp> regexp,
3773 Handle<JSArray> last_match_array, 3852 Handle<JSArray> last_match_array,
3774 FixedArrayBuilder* builder) { 3853 FixedArrayBuilder* builder) {
3775 ASSERT(subject->IsFlat()); 3854 ASSERT(subject->IsFlat());
3855
3856 if (subject->IsAsciiRepresentationUnderneath() &&
3857 regexp->Pattern()->Equals(
3858 isolate->heap()->bounded_word_regexp_symbol())) {
3859 return SearchBoundedWords(isolate, subject, last_match_array, builder);
3860 }
3861
3776 int match_start = -1; 3862 int match_start = -1;
3777 int match_end = 0; 3863 int match_end = 0;
3778 int pos = 0; 3864 int pos = 0;
3779 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); 3865 int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
3780 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; 3866 if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
3781 3867
3782 OffsetsVector registers(required_registers, isolate); 3868 OffsetsVector registers(required_registers, isolate);
3783 Vector<int32_t> register_vector(registers.vector(), registers.length()); 3869 Vector<int32_t> register_vector(registers.vector(), registers.length());
3784 int subject_length = subject->length(); 3870 int subject_length = subject->length();
3785 bool first = true; 3871 bool first = true;
(...skipping 9622 matching lines...) Expand 10 before | Expand all | Expand 10 after
13408 // Handle last resort GC and make sure to allow future allocations 13494 // Handle last resort GC and make sure to allow future allocations
13409 // to grow the heap without causing GCs (if possible). 13495 // to grow the heap without causing GCs (if possible).
13410 isolate->counters()->gc_last_resort_from_js()->Increment(); 13496 isolate->counters()->gc_last_resort_from_js()->Increment();
13411 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags, 13497 isolate->heap()->CollectAllGarbage(Heap::kNoGCFlags,
13412 "Runtime::PerformGC"); 13498 "Runtime::PerformGC");
13413 } 13499 }
13414 } 13500 }
13415 13501
13416 13502
13417 } } // namespace v8::internal 13503 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/heap.h ('k') | test/mjsunit/string-replace-word-boundary.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698