Chromium Code Reviews| Index: src/runtime.cc |
| diff --git a/src/runtime.cc b/src/runtime.cc |
| index bd81704df05b98a68e1c7cbe4f99c567a3411abc..0144b122c75e2cb2c643250cc3e5ae573a6d2f5c 100644 |
| --- a/src/runtime.cc |
| +++ b/src/runtime.cc |
| @@ -1180,6 +1180,336 @@ static Object* Runtime_CharFromCode(Arguments args) { |
| return Heap::empty_string(); |
| } |
| +class ReplacementStringBuilder { |
|
Erik Corry
2009/03/12 10:13:00
I think you should use StringBuilderConcatHelper i
Lasse Reichstein
2009/03/13 08:36:51
Done.
|
| + public: |
| + ReplacementStringBuilder() : string_() {} |
| + |
| + void Add(Handle<String> string) { |
| + StringShape shape(*string); |
| + if (string->length(shape) > 0) { |
| + if (string_.is_null()) { |
| + string_ = string; |
| + } else { |
| + string_ = |
| + Factory::NewConsString(string_, StringShape(*string_), |
| + string, shape); |
| + } |
| + } |
| + } |
| + |
| + Handle<String> ToString() { |
| + if (string_.is_null()) { |
| + return Factory::empty_string(); |
| + } |
| + return string_; |
| + } |
| + private: |
| + Handle<String> string_; |
| +}; |
| + |
| + |
| +class CompiledReplacement { |
| + public: |
| + CompiledReplacement(Handle<String> replacement, int capture_count) |
| + : parts_(1), replacement_substrings_(0) { |
| + ParseReplacement(replacement, capture_count); |
|
Erik Corry
2009/03/12 10:13:00
Nontrivial work shouldn't be in constructors.
Lasse Reichstein
2009/03/13 08:36:51
Done.
|
| + } |
| + void Apply(ReplacementStringBuilder* builder, |
| + int match_from, |
| + int match_to, |
| + Handle<String> subject, |
| + Handle<JSArray> last_match_info) { |
| + for (int i = 0, n = parts_.length(); i < n; i++) { |
| + ReplacementPart part = parts_[i]; |
| + switch (part.tag) { |
| + case SUBJECT_MATCH: |
| + builder->Add(Factory::NewStringSlice(subject, match_from, match_to)); |
| + break; |
| + case SUBJECT_PREFIX: |
| + builder->Add(Factory::NewStringSlice(subject, 0, match_from)); |
| + break; |
| + case SUBJECT_SUFFIX: { |
| + int length = subject->length(); |
| + builder->Add(Factory::NewStringSlice(subject, match_to, length)); |
| + break; |
| + } |
| + case SUBJECT_CAPTURE: { |
| + int capture = part.data; |
| + FixedArray* match_info = last_match_info->elements(); |
|
Erik Corry
2009/03/12 10:13:00
Move outside loop? Actually, you should move this
Lasse Reichstein
2009/03/13 08:36:51
As commented later, it can't be moved that far out
|
| + int from = RegExpImpl::GetCapture(match_info, capture * 2); |
| + int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1); |
| + builder->Add(Factory::NewStringSlice(subject, from, to)); |
| + break; |
| + } |
| + case REPLACEMENT_SUBSTRING: |
| + builder->Add(replacement_substrings_[part.data]); |
| + break; |
| + default: |
| + UNREACHABLE(); |
| + } |
| + } |
| + } |
| + private: |
| + enum PartType { |
| + SUBJECT_MATCH = -1, |
|
Erik Corry
2009/03/12 10:13:00
Subject match seems like a special case of subject
Lasse Reichstein
2009/03/13 08:36:51
True. Slightly faster to implement since I already
|
| + SUBJECT_PREFIX = -2, |
| + SUBJECT_SUFFIX = -3, |
| + SUBJECT_CAPTURE = -4, |
| + REPLACEMENT_SUBSTRING = -5 |
|
Erik Corry
2009/03/12 10:13:00
I'd prefer these to be positive and the offsets ne
Lasse Reichstein
2009/03/13 08:36:51
Done.
|
| + }; |
| + |
| + struct ReplacementPart { |
| + // If tag >= 0 then it is the start index of a substring of the replacement |
| + // pattern. |
| + ReplacementPart(int tag, int data) |
| + : tag(tag), data(data) {} |
| + int tag; |
| + int data; |
|
Erik Corry
2009/03/12 10:13:00
'data' is too generic a name, there's no comment t
Lasse Reichstein
2009/03/13 08:36:51
Comments added.
|
| + }; |
| + |
| + template<typename Char> |
| + static void Parse(ZoneList<ReplacementPart>* parts, |
| + Vector<Char> characters, |
| + int capture_count) { |
| + int length = characters.length(); |
| + int last = 0; |
| + for (int i = 0; i < length; i++) { |
| + Char c = characters[i]; |
| + if (c == '$') { |
| + int next_index = i + 1; |
| + if (next_index == length) { // No next character! |
| + break; |
| + } |
| + Char c2 = characters[next_index]; |
| + switch (c2) { |
| + case '$': |
| + if (i > last) { |
| + // Up to and including the first "$". |
| + parts->Add(ReplacementPart(last, next_index)); |
| + last = next_index + 1; // Continue after the second "$". |
| + } else { |
| + last = next_index; // Continue with the second "$". |
| + } |
| + i = next_index; |
| + break; |
| + case '`': |
| + if (i > last) { |
| + parts->Add(ReplacementPart(last, i)); |
|
Erik Corry
2009/03/12 10:13:00
Perhaps parts->Add could check for last == i to sa
Lasse Reichstein
2009/03/13 08:36:51
Parts is just a ZoneList, and this is a self-conta
|
| + } |
| + parts->Add(ReplacementPart(SUBJECT_PREFIX, 0)); |
| + i = next_index; |
| + last = i + 1; |
| + break; |
| + case '\'': |
| + if (i > last) { |
| + parts->Add(ReplacementPart(last, i)); |
| + } |
| + parts->Add(ReplacementPart(SUBJECT_SUFFIX, 0)); |
| + i = next_index; |
| + last = i + 1; |
| + break; |
| + case '&': |
| + if (i > last) { |
| + parts->Add(ReplacementPart(last, i)); |
| + } |
| + parts->Add(ReplacementPart(SUBJECT_MATCH, 0)); |
| + i = next_index; |
| + last = i + 1; |
| + break; |
| + case '0': |
|
Erik Corry
2009/03/12 10:13:00
Are $0 and $04 allowed? Should $0 be taken verbat
Lasse Reichstein
2009/03/13 08:36:51
Yes and yes.
|
| + case '1': |
| + case '2': |
| + case '3': |
| + case '4': |
| + case '5': |
| + case '6': |
| + case '7': |
| + case '8': |
| + case '9': { |
| + int capture_ref = c2 - '0'; |
| + if (capture_ref > capture_count) { |
| + i = next_index; |
| + continue; |
| + } |
| + int second_digit_index = next_index + 1; |
| + if (second_digit_index < length) { |
| + // Peek ahead to see if we have two digits. |
| + Char c3 = characters[second_digit_index]; |
| + if ('0' <= c3 && c3 <= '9') { // Double digits. |
| + int double_digit_ref = capture_ref * 10 + c3 - '0'; |
| + if (double_digit_ref <= capture_count) { |
| + next_index = second_digit_index; |
| + capture_ref = double_digit_ref; |
| + } |
| + } |
| + } |
| + if (capture_ref > 0) { |
| + if (i > last) { |
| + parts->Add(ReplacementPart(last, i)); |
| + } |
| + parts->Add(ReplacementPart(SUBJECT_CAPTURE, capture_ref)); |
| + last = next_index + 1; |
| + } |
| + i = next_index; |
| + break; |
| + } |
| + default: |
|
Erik Corry
2009/03/12 10:13:00
Should $F00 replace with $FOO or FOO?
Lasse Reichstein
2009/03/13 08:36:51
With "$F00".
|
| + i = next_index; |
| + break; |
| + } |
| + } |
| + } |
| + if (length > last) { |
| + parts->Add(ReplacementPart(last, length)); |
| + } |
| + } |
| + |
| + void ParseReplacement(Handle<String> replacement, int capture_count) { |
| + StringShape shape(*replacement); |
| + ASSERT(replacement->IsFlat(shape)); |
| + if (shape.IsAsciiRepresentation()) { |
| + Parse(&parts_, replacement->ToAsciiVector(), capture_count); |
| + } else { |
| + ASSERT(shape.IsTwoByteRepresentation()); |
| + Parse(&parts_, replacement->ToUC16Vector(), capture_count); |
| + } |
| + // Find substrings of replacement string and create them as String objcts.. |
| + int index = 0; |
| + for (int i = 0, n = parts_.length(); i < n; i++) { |
| + int tag = parts_[i].tag; |
| + if (tag >= 0) { // A replacement string slice. |
| + int end = parts_[i].data; |
| + replacement_substrings_.Add(Factory::NewStringSlice(replacement, |
| + tag, |
| + end)); |
| + parts_[i].tag = REPLACEMENT_SUBSTRING; |
| + parts_[i].data = index; |
| + index++; |
| + } |
| + } |
| + } |
| + |
| + ZoneList<ReplacementPart> parts_; |
| + ZoneList<Handle<String> > replacement_substrings_; |
| +}; |
| + |
| + |
| +static Object* StringReplaceRegExpWithString(String* subject, |
| + JSRegExp* regexp, |
| + String* replacement, |
| + JSArray* last_match_info) { |
| + ASSERT(subject->IsFlat(StringShape(subject))); |
| + ASSERT(replacement->IsFlat(StringShape(replacement))); |
| + |
| + HandleScope handles; |
| + |
| + int length = subject->length(); |
| + Handle<String> subject_handle(subject); |
| + Handle<JSRegExp> regexp_handle(regexp); |
| + Handle<String> replacement_handle(replacement); |
| + Handle<JSArray> last_match_info_handle(last_match_info); |
| + Handle<Object> match = RegExpImpl::Exec(regexp_handle, |
| + subject_handle, |
| + 0, |
| + last_match_info_handle); |
| + if (match.is_null()) { |
| + return Failure::Exception(); |
| + } |
| + if (match->IsNull()) { |
| + return *subject_handle; |
| + } |
| + |
| + int capture_count = regexp_handle->CaptureCount(); |
| + |
| + // CompiledReplacement uses zone allocation. |
| + ZoneScope zone(DELETE_ON_EXIT); |
| + CompiledReplacement compiled_replacement(replacement_handle, capture_count); |
| + ReplacementStringBuilder builder; |
| + bool is_global = regexp_handle->GetFlags().is_global(); |
| + |
| + // Index of end of last match. |
| + int prev = 0; |
| + |
| + do { |
| + ASSERT(last_match_info_handle->HasFastElements()); |
| + FixedArray* match_data = last_match_info_handle->elements(); |
|
Erik Corry
2009/03/12 10:13:00
Not using a handle is a little dodgy. If you move
Lasse Reichstein
2009/03/13 08:36:51
Won't work. The loop might be preempted (since it
Erik Corry
2009/03/13 09:12:34
If you retain a reference to the old FixedArray th
|
| + ASSERT_EQ(capture_count * 2 + 2, |
| + Smi::cast( |
| + match_data->get(RegExpImpl::kLastCaptureCount))->value()); |
| + |
| + int start = RegExpImpl::GetCapture(match_data, 0); |
| + int end = RegExpImpl::GetCapture(match_data, 1); |
| + |
| + if (prev < start) { |
| + builder.Add(Factory::NewStringSlice(subject_handle, prev, start)); |
| + } |
| + compiled_replacement.Apply(&builder, |
| + start, |
| + end, |
| + subject_handle, |
| + last_match_info_handle); |
| + prev = end; |
| + |
| + // Only continue checking for global regexps. |
| + if (!is_global) break; |
| + |
| + // Continue from where the match ended, unless it was an empty match. |
| + int next = end; |
| + if (start == end) { |
| + next = end + 1; |
| + if (next > length) break; |
| + } |
| + |
| + match = RegExpImpl::Exec(regexp_handle, |
| + subject_handle, |
| + next, |
| + last_match_info_handle); |
| + if (match.is_null()) { |
| + return Failure::Exception(); |
| + } |
| + } while (!match->IsNull()); |
| + |
| + if (prev < length) { |
| + builder.Add(Factory::NewStringSlice(subject_handle, prev, length)); |
| + } |
| + |
| + return *(builder.ToString()); |
| +} |
| + |
| + |
| +static Object* Runtime_StringReplaceRegExpWithString(Arguments args) { |
| + ASSERT(args.length() == 4); |
| + |
| + CONVERT_CHECKED(String, subject, args[0]); |
| + StringShape subject_shape(subject); |
| + if (!subject->IsFlat(subject_shape)) { |
| + Object* flat_subject = subject->TryFlatten(subject_shape); |
| + if (flat_subject->IsFailure()) { |
| + return flat_subject; |
| + } |
| + subject = String::cast(flat_subject); |
| + } |
| + |
| + CONVERT_CHECKED(String, replacement, args[2]); |
| + StringShape replacement_shape(replacement); |
| + if (!replacement->IsFlat(replacement_shape)) { |
| + Object* flat_replacement = replacement->TryFlatten(replacement_shape); |
| + if (flat_replacement->IsFailure()) { |
| + return flat_replacement; |
| + } |
| + replacement = String::cast(flat_replacement); |
| + } |
| + |
| + CONVERT_CHECKED(JSRegExp, regexp, args[1]); |
| + CONVERT_CHECKED(JSArray, last_match_info, args[3]); |
|
Erik Corry
2009/03/12 10:13:00
You need to check here that the last_match_info ha
Lasse Reichstein
2009/03/13 08:36:51
I only ever use last_match_info after having execu
|
| + |
| + return StringReplaceRegExpWithString(subject, |
| + regexp, |
| + replacement, |
| + last_match_info); |
| +} |
| + |
| + |
| // Cap on the maximal shift in the Boyer-Moore implementation. By setting a |
| // limit, we can fix the size of tables. |