Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1412)

Unified Diff: src/runtime.cc

Issue 10191001: Fast path for the regexp of bounded words. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/heap.h ('k') | test/mjsunit/string-replace-word-boundary.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index 10de8325e562b09fda229d69210d346bc1196c06..d8210ff85c7560f6d698bb72af4b0471cecb2127 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -3766,6 +3766,85 @@ static bool SearchStringMultiple(Isolate* isolate,
}
+// Global search for matches to /\b\w+\b/ in an ASCII subject.
+static RegExpImpl::IrregexpResult SearchBoundedWords(
+ Isolate* isolate,
+ Handle<String> subject,
+ Handle<JSArray> last_match_array,
+ FixedArrayBuilder* builder) {
+ int word_start = 0;
+ int word_end = 0;
+ bool is_word_at_previous_pos = false;
+ int subject_length = subject->length();
+ String::FlatContent content = subject->GetFlatContent();
+ ASSERT(content.IsAscii());
+ const char* subject_chars = content.ToAsciiVector().start();
+
+ // This bitmap corresponds to whether each of the ASCII chars (0-127) match
+ // to the regular expression \w (equivalent to [0-9A-Z_a-z]).
+ static const uint32_t bitmap[4] = {
+ 0x00000000, // ASCII 0-31
+ 0x03FF0000, // ASCII 32-63 : 0-9
+ 0x87FFFFFE, // ASCII 64-95 : A-Z and _
+ 0x07FFFFFE // ASCII 96-127 : a-z
+ };
+ static const char bitmap_block_shift = 5;
+ STATIC_ASSERT(1 << bitmap_block_shift == sizeof(bitmap[0]) * kBitsPerByte);
+ static const char bitmap_block_mask = (1 << bitmap_block_shift) - 1;
+
+ for (int current_pos = 0; current_pos < subject_length; current_pos++) {
+ char c = subject_chars[current_pos];
+ // Lookup character in one of the four bitmap blocks.
+ bool is_word_at_current_pos =
+ (bitmap[c >> bitmap_block_shift] >> (c & bitmap_block_mask)) & 1;
+ if (is_word_at_current_pos != is_word_at_previous_pos) {
+ if (is_word_at_current_pos) {
+ // Word boundary at word start.
+ word_start = current_pos;
+ if (word_start != 0) {
+ // Add subject slice between last word and current word.
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ word_end,
+ word_start);
+ }
+ } else {
+ // Reserve capacity for this entry and for the following subject slice.
+ STATIC_ASSERT(kMaxBuilderEntriesPerRegExpMatch >= 3);
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ // Word boundary at word end. Capture word.
+ word_end = current_pos;
+ HandleScope scope(isolate);
+ builder->Add(*isolate->factory()->NewSubString(subject,
+ word_start,
+ word_end));
+ }
+ is_word_at_previous_pos = is_word_at_current_pos;
+ }
+ }
+
+ // Handle last piece: capture last word or add subject slice for non-word.
+ if (is_word_at_previous_pos) {
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ HandleScope scope(isolate);
+ builder->Add(*isolate->factory()->NewSubString(subject,
+ word_start,
+ subject_length));
+ word_end = subject_length;
+ } else if (word_end < subject_length) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ word_end,
+ subject_length);
+ }
+
+ if (word_end !=0 || is_word_at_previous_pos) {
+ SetLastMatchInfoNoCaptures(subject, last_match_array, word_start, word_end);
+ return RegExpImpl::RE_SUCCESS;
+ } else {
+ return RegExpImpl::RE_FAILURE;
+ }
+}
+
+
static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
Isolate* isolate,
Handle<String> subject,
@@ -3773,6 +3852,13 @@ static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
Handle<JSArray> last_match_array,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
+
+ if (subject->IsAsciiRepresentationUnderneath() &&
+ regexp->Pattern()->Equals(
+ isolate->heap()->bounded_word_regexp_symbol())) {
+ return SearchBoundedWords(isolate, subject, last_match_array, builder);
+ }
+
int match_start = -1;
int match_end = 0;
int pos = 0;
« no previous file with comments | « src/heap.h ('k') | test/mjsunit/string-replace-word-boundary.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698