| Index: src/runtime.cc
|
| diff --git a/src/runtime.cc b/src/runtime.cc
|
| index 10de8325e562b09fda229d69210d346bc1196c06..d8210ff85c7560f6d698bb72af4b0471cecb2127 100644
|
| --- a/src/runtime.cc
|
| +++ b/src/runtime.cc
|
| @@ -3766,6 +3766,85 @@ static bool SearchStringMultiple(Isolate* isolate,
|
| }
|
|
|
|
|
| +// Global search for matches to /\b\w+\b/ in an ASCII subject.
|
| +static RegExpImpl::IrregexpResult SearchBoundedWords(
|
| + Isolate* isolate,
|
| + Handle<String> subject,
|
| + Handle<JSArray> last_match_array,
|
| + FixedArrayBuilder* builder) {
|
| + int word_start = 0;
|
| + int word_end = 0;
|
| + bool is_word_at_previous_pos = false;
|
| + int subject_length = subject->length();
|
| + String::FlatContent content = subject->GetFlatContent();
|
| + ASSERT(content.IsAscii());
|
| + const char* subject_chars = content.ToAsciiVector().start();
|
| +
|
| + // This bitmap corresponds to whether each of the ASCII chars (0-127) match
|
| + // to the regular expression \w (equivalent to [0-9A-Z_a-z]).
|
| + static const uint32_t bitmap[4] = {
|
| + 0x00000000, // ASCII 0-31
|
| + 0x03FF0000, // ASCII 32-63 : 0-9
|
| + 0x87FFFFFE, // ASCII 64-95 : A-Z and _
|
| + 0x07FFFFFE // ASCII 96-127 : a-z
|
| + };
|
| + static const char bitmap_block_shift = 5;
|
| + STATIC_ASSERT(1 << bitmap_block_shift == sizeof(bitmap[0]) * kBitsPerByte);
|
| + static const char bitmap_block_mask = (1 << bitmap_block_shift) - 1;
|
| +
|
| + for (int current_pos = 0; current_pos < subject_length; current_pos++) {
|
| + char c = subject_chars[current_pos];
|
| + // Lookup character in one of the four bitmap blocks.
|
| + bool is_word_at_current_pos =
|
| + (bitmap[c >> bitmap_block_shift] >> (c & bitmap_block_mask)) & 1;
|
| + if (is_word_at_current_pos != is_word_at_previous_pos) {
|
| + if (is_word_at_current_pos) {
|
| + // Word boundary at word start.
|
| + word_start = current_pos;
|
| + if (word_start != 0) {
|
| + // Add subject slice between last word and current word.
|
| + ReplacementStringBuilder::AddSubjectSlice(builder,
|
| + word_end,
|
| + word_start);
|
| + }
|
| + } else {
|
| + // Reserve capacity for this entry and for the following subject slice.
|
| + STATIC_ASSERT(kMaxBuilderEntriesPerRegExpMatch >= 3);
|
| + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
|
| + // Word boundary at word end. Capture word.
|
| + word_end = current_pos;
|
| + HandleScope scope(isolate);
|
| + builder->Add(*isolate->factory()->NewSubString(subject,
|
| + word_start,
|
| + word_end));
|
| + }
|
| + is_word_at_previous_pos = is_word_at_current_pos;
|
| + }
|
| + }
|
| +
|
| + // Handle last piece: capture last word or add subject slice for non-word.
|
| + if (is_word_at_previous_pos) {
|
| + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
|
| + HandleScope scope(isolate);
|
| + builder->Add(*isolate->factory()->NewSubString(subject,
|
| + word_start,
|
| + subject_length));
|
| + word_end = subject_length;
|
| + } else if (word_end < subject_length) {
|
| + ReplacementStringBuilder::AddSubjectSlice(builder,
|
| + word_end,
|
| + subject_length);
|
| + }
|
| +
|
| + if (word_end !=0 || is_word_at_previous_pos) {
|
| + SetLastMatchInfoNoCaptures(subject, last_match_array, word_start, word_end);
|
| + return RegExpImpl::RE_SUCCESS;
|
| + } else {
|
| + return RegExpImpl::RE_FAILURE;
|
| + }
|
| +}
|
| +
|
| +
|
| static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
|
| Isolate* isolate,
|
| Handle<String> subject,
|
| @@ -3773,6 +3852,13 @@ static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
|
| Handle<JSArray> last_match_array,
|
| FixedArrayBuilder* builder) {
|
| ASSERT(subject->IsFlat());
|
| +
|
| + if (subject->IsAsciiRepresentationUnderneath() &&
|
| + regexp->Pattern()->Equals(
|
| + isolate->heap()->bounded_word_regexp_symbol())) {
|
| + return SearchBoundedWords(isolate, subject, last_match_array, builder);
|
| + }
|
| +
|
| int match_start = -1;
|
| int match_end = 0;
|
| int pos = 0;
|
|
|