Chromium Code Reviews| Index: base/strings/string_util.cc |
| diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc |
| index 71ae894dd6e7559e1efa0a2623e6328353ce61d4..b44026fc051e04f2db6c92971759e43d2adc9ff0 100644 |
| --- a/base/strings/string_util.cc |
| +++ b/base/strings/string_util.cc |
| @@ -712,111 +712,132 @@ string16 FormatBytesUnlocalized(int64_t bytes) { |
| } |
| // Runs in O(n) time in the length of |str|. |
| -template<class StringType> |
| +template <class StringType> |
| void DoReplaceSubstringsAfterOffset(StringType* str, |
| - size_t offset, |
| + size_t initial_offset, |
| BasicStringPiece<StringType> find_this, |
| BasicStringPiece<StringType> replace_with, |
| bool replace_all) { |
| DCHECK(!find_this.empty()); |
| // If the find string doesn't appear, there's nothing to do. |
| - offset = str->find(find_this.data(), offset, find_this.size()); |
| - if (offset == StringType::npos) |
| + size_t first_match = |
| + str->find(find_this.data(), initial_offset, find_this.length()); |
| + if (first_match == StringType::npos) |
| return; |
| // If we're only replacing one instance, there's no need to do anything |
| // complicated. |
| - size_t find_length = find_this.length(); |
| + const size_t find_length = find_this.length(); |
| + const size_t replace_length = replace_with.length(); |
| if (!replace_all) { |
| - str->replace(offset, find_length, replace_with.data(), replace_with.size()); |
| + str->replace(first_match, find_length, replace_with.data(), replace_length); |
| return; |
| } |
| // If the find and replace strings are the same length, we can simply use |
| // replace() on each instance, and finish the entire operation in O(n) time. |
| - size_t replace_length = replace_with.length(); |
| if (find_length == replace_length) { |
| + size_t offset = first_match; |
|
Peter Kasting
2017/07/27 01:09:57
Nit: It does one unnecessary comparison at the sta
ncarter (slow)
2017/07/28 02:34:25
Done.
FWIW it looks like MSVC manages to omit the
|
| do { |
| - str->replace(offset, find_length, |
| - replace_with.data(), replace_with.size()); |
| + str->replace(offset, find_length, replace_with.data(), replace_length); |
| offset = str->find(find_this.data(), offset + replace_length, |
| - find_this.size()); |
| + find_this.length()); |
|
danakj
2017/07/27 15:55:30
nit: this could be written find_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
|
| } while (offset != StringType::npos); |
| return; |
| } |
| // Since the find and replace strings aren't the same length, a loop like the |
| // one above would be O(n^2) in the worst case, as replace() will shift the |
| - // entire remaining string each time. We need to be more clever to keep |
| - // things O(n). |
| - // |
| - // If we're shortening the string, we can alternate replacements with shifting |
| - // forward the intervening characters using memmove(). |
| + // entire remaining string each time. We need to be more clever to keep things |
| + // O(n). |
|
Peter Kasting
2017/07/27 05:25:14
Nit: Somewhere, we need comments that say what the
ncarter (slow)
2017/07/28 02:34:25
Done.
|
| size_t str_length = str->length(); |
| - if (find_length > replace_length) { |
| - size_t write_offset = offset; |
| - do { |
| - if (replace_length) { |
| - str->replace(write_offset, replace_length, |
| - replace_with.data(), replace_with.size()); |
| - write_offset += replace_length; |
| - } |
| - size_t read_offset = offset + find_length; |
| - offset = std::min( |
| - str->find(find_this.data(), read_offset, find_this.size()), |
| - str_length); |
| - size_t length = offset - read_offset; |
| - if (length) { |
| - memmove(&(*str)[write_offset], &(*str)[read_offset], |
| - length * sizeof(typename StringType::value_type)); |
| - write_offset += length; |
| + size_t expansion = 0; |
| + if (replace_length > find_length) { |
| + // This operation lengthens the string; determine the new length by counting |
| + // matches. |
| + size_t num_matches = 0; |
| + for (size_t match = first_match; match != StringType::npos; |
|
danakj
2017/07/27 15:55:30
we should be consistent and either for() always li
ncarter (slow)
2017/07/28 02:34:24
Switched to a for loop above, based on the convers
|
| + match = str->find(find_this.data(), match + find_length, |
| + find_this.length())) { |
|
danakj
2017/07/27 15:55:30
nit: find_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
|
| + num_matches++; |
|
Peter Kasting
2017/07/27 05:25:14
Nit: Totally personal style, but postincrements re
ncarter (slow)
2017/07/28 02:34:24
Done.
|
| + } |
| + expansion = (replace_length - find_length) * num_matches; |
|
danakj
2017/07/27 15:55:30
this is nitty, but as per peter's comment below, i
ncarter (slow)
2017/07/28 02:34:24
I've eliminated the imul instruction, by accumulat
|
| + const size_t final_length = str_length + expansion; |
| + |
| + if (str->capacity() < final_length) { |
|
ncarter (slow)
2017/07/26 23:50:14
Using a temporary seems to result in fewer copies
Peter Kasting
2017/07/27 01:09:57
I tried to optimize the original code to do as few
danakj
2017/07/27 15:55:30
I'd like to understand that we really want to use
danakj
2017/07/27 15:58:48
Oops, I meant memmove() here as it's written expli
Peter Kasting
2017/07/27 22:09:47
I don't think memmove() usually copies to a temp b
Peter Kasting
2017/07/27 22:11:21
More confirmation of this: see the answer (and com
ncarter (slow)
2017/07/28 02:34:25
I agree with danakj's intuition that a temp seems
Peter Kasting
2017/07/28 08:29:24
By 1, right? Because we replace "copy to back" wi
danakj
2017/07/28 15:53:48
In my profiling work in chromium, code has been co
ncarter (slow)
2017/07/28 21:46:34
No, by a difference of 2. In the worse case the wh
Peter Kasting
2017/07/29 02:18:01
To summarize your explanation: if we have to grow
ncarter (slow)
2017/07/31 18:55:39
Agree.
FWIW, I figured out the algorithm for the
|
| + // Since we'd have to realloc the string anyway, use a temporary to build |
| + // the result. |
| + StringType src; |
| + str->swap(src); |
| + str->reserve(final_length); |
|
ncarter (slow)
2017/07/26 23:50:14
Should we worry that using a temporary here will b
Peter Kasting
2017/07/27 01:09:57
See reply above.
ncarter (slow)
2017/07/28 02:34:25
Acknowledged.
FWIW this article is what was on my
|
| + |
| + size_t pos = 0; |
| + for (size_t i = 0; i < num_matches; ++i) { |
|
Peter Kasting
2017/07/27 05:25:14
Nit: I feel like we ought to be able to write this
ncarter (slow)
2017/07/28 02:34:24
Writing it as you suggest means there's an extra c
|
| + size_t match = |
| + (i == 0) ? first_match |
| + : src.find(find_this.data(), pos, find_this.length()); |
|
danakj
2017/07/27 15:55:30
nit: find_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
|
| + str->append(src, pos, match - pos); |
| + str->append(replace_with.data(), replace_with.length()); |
|
danakj
2017/07/27 15:55:30
nit: replace_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
|
| + pos = match + find_length; |
| } |
| - } while (offset < str_length); |
| - str->resize(write_offset); |
| - return; |
| + |
| + // Handle substring after the final match. |
| + str->append(src, pos, str_length - pos); |
| + return; |
| + } |
| + |
| + // Prepare for the memmove loop below -- expand the string to its final size |
| + // by shifting the data after the first match to the end of the resized |
| + // string. |
| + size_t shift_src = first_match + find_length; |
| + size_t shift_dst = shift_src + expansion; |
| + |
| + // Big |expansion| factors (relative to |str_length|) require padding up to |
| + // |shift_dst|. |
| + if (shift_dst > str_length) |
| + str->resize(shift_dst); |
| + |
| + str->replace(shift_dst, str_length - shift_src, *str, shift_src, |
| + str_length - shift_src); |
|
Peter Kasting
2017/07/27 05:25:14
Nit: We should probably be consistent about whethe
ncarter (slow)
2017/07/28 02:34:25
This particular replace can't be replaced with mem
|
| + str_length = final_length; |
| } |
| - // We're lengthening the string. We can use alternating replacements and |
| - // memmove() calls like above, but we need to precalculate the final string |
| - // length and then expand from back-to-front to avoid overwriting the string |
| - // as we're reading it, needing to shift, or having to copy to a second string |
|
Peter Kasting
2017/07/27 01:09:57
This "needing to shift" is, AFAICT, exactly what y
Peter Kasting
2017/07/27 05:25:14
That said, the only way I can think of to do this
ncarter (slow)
2017/07/28 02:34:25
I'd thought about this too. The best I came up wit
|
| - // temporarily. |
| - size_t first_match = offset; |
| - |
| - // First, calculate the final length and resize the string. |
| - size_t final_length = str_length; |
| - size_t expansion = replace_length - find_length; |
| - size_t current_match; |
| + // We can alternate replacements with memmove. This won't overwrite the source |
| + // region so long as |write_offset| <= |read_offset|; that is guaranteed |
| + // because: |
| + // |
| + // (a) If the string is being shortened, |expansion| is zero and |
| + // |write_offset| grows slower than |read_offset|. |
| + // |
| + // (b) If the string is being lengthened, |write_offset| grows faster than |
| + // |read_offset|, but |expansion| is big enough so that |write_offset| |
| + // will only catch up to |read_offset| at the point of the last match. |
| + size_t write_offset = first_match; |
| + size_t read_offset = first_match + expansion; |
| do { |
|
ncarter (slow)
2017/07/26 23:50:14
Note that this is essentially the old 'string is s
ncarter (slow)
2017/07/28 02:34:25
Done.
|
| - final_length += expansion; |
| - // Minor optimization: save this offset into |current_match|, so that on |
| - // exit from the loop, |current_match| will point at the last instance of |
| - // the find string, and we won't need to find() it again immediately. |
| - current_match = offset; |
| - offset = str->find(find_this.data(), offset + find_length, |
| - find_this.size()); |
| - } while (offset != StringType::npos); |
| - str->resize(final_length); |
| - |
| - // Now do the replacement loop, working backwards through the string. |
| - for (size_t prev_match = str_length, write_offset = final_length; ; |
| - current_match = str->rfind(find_this.data(), current_match - 1, |
| - find_this.size())) { |
| - size_t read_offset = current_match + find_length; |
| - size_t length = prev_match - read_offset; |
| + if (replace_length) { |
| + str->replace(write_offset, replace_length, replace_with.data(), |
| + replace_length); |
| + write_offset += replace_length; |
| + } |
| + read_offset += find_length; |
| + size_t match = |
| + std::min(str->find(find_this.data(), read_offset, find_this.length()), |
| + str_length); |
| + size_t length = match - read_offset; |
| if (length) { |
| - write_offset -= length; |
| memmove(&(*str)[write_offset], &(*str)[read_offset], |
|
ncarter (slow)
2017/07/26 23:50:14
Any reason we preferred memmove over string::repla
Peter Kasting
2017/07/27 01:09:57
I don't recall having one, just "I know I'm moving
ncarter (slow)
2017/07/28 02:34:25
I've switched to StringType::traits_type::move/cop
Peter Kasting
2017/07/28 08:29:24
It's more readable too. +1.
|
| length * sizeof(typename StringType::value_type)); |
| + write_offset += length; |
| + read_offset += length; |
| } |
| - write_offset -= replace_length; |
| - str->replace(write_offset, replace_length, |
| - replace_with.data(), replace_with.size()); |
| - if (current_match == first_match) |
| - return; |
| - prev_match = current_match; |
| - } |
| + } while (read_offset < str_length); |
| + |
| + // If we're shortening the string, truncate it now. |
| + str->resize(write_offset); |
| + |
| + return; |
|
Peter Kasting
2017/07/27 05:25:14
Nit: Trailing return unnecessary
ncarter (slow)
2017/07/28 02:34:24
Done.
|
| } |
| void ReplaceFirstSubstringAfterOffset(string16* str, |