base/strings/string_util.cc - Issue 2979393002: [string_util] fix bug in ReplaceSubstringsAfterOffset()

Unified Diff: base/strings/string_util.cc

Issue 2979393002: [string_util] fix bug in ReplaceSubstringsAfterOffset() (Closed)

Patch Set: Reduce scope of change to just correctness fix. Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/strings/string_util.cc

diff --git a/base/strings/string_util.cc b/base/strings/string_util.cc

index 71ae894dd6e7559e1efa0a2623e6328353ce61d4..b44026fc051e04f2db6c92971759e43d2adc9ff0 100644

--- a/base/strings/string_util.cc

+++ b/base/strings/string_util.cc

@@ -712,111 +712,132 @@ string16 FormatBytesUnlocalized(int64_t bytes) {

}

// Runs in O(n) time in the length of |str|.

-template<class StringType>

+template <class StringType>

void DoReplaceSubstringsAfterOffset(StringType* str,

- size_t offset,

+ size_t initial_offset,

BasicStringPiece<StringType> find_this,

BasicStringPiece<StringType> replace_with,

bool replace_all) {

DCHECK(!find_this.empty());

// If the find string doesn't appear, there's nothing to do.

- offset = str->find(find_this.data(), offset, find_this.size());

- if (offset == StringType::npos)

+ size_t first_match =

+ str->find(find_this.data(), initial_offset, find_this.length());

+ if (first_match == StringType::npos)

return;

// If we're only replacing one instance, there's no need to do anything

// complicated.

- size_t find_length = find_this.length();

+ const size_t find_length = find_this.length();

+ const size_t replace_length = replace_with.length();

if (!replace_all) {

- str->replace(offset, find_length, replace_with.data(), replace_with.size());

+ str->replace(first_match, find_length, replace_with.data(), replace_length);

return;

}

// If the find and replace strings are the same length, we can simply use

// replace() on each instance, and finish the entire operation in O(n) time.

- size_t replace_length = replace_with.length();

if (find_length == replace_length) {

+ size_t offset = first_match;

Peter Kasting 2017/07/27 01:09:57 Nit: It does one unnecessary comparison at the sta

ncarter (slow) 2017/07/28 02:34:25 Done. FWIW it looks like MSVC manages to omit the

do {

- str->replace(offset, find_length,

- replace_with.data(), replace_with.size());

+ str->replace(offset, find_length, replace_with.data(), replace_length);

offset = str->find(find_this.data(), offset + replace_length,

- find_this.size());

+ find_this.length());

danakj 2017/07/27 15:55:30 nit: this could be written find_length?

ncarter (slow) 2017/07/28 02:34:25 Done.

} while (offset != StringType::npos);

return;

}

// Since the find and replace strings aren't the same length, a loop like the

// one above would be O(n^2) in the worst case, as replace() will shift the

- // entire remaining string each time. We need to be more clever to keep

- // things O(n).

- //

- // If we're shortening the string, we can alternate replacements with shifting

- // forward the intervening characters using memmove().

+ // entire remaining string each time. We need to be more clever to keep things

+ // O(n).

Peter Kasting 2017/07/27 05:25:14 Nit: Somewhere, we need comments that say what the

ncarter (slow) 2017/07/28 02:34:25 Done.

size_t str_length = str->length();

- if (find_length > replace_length) {

- size_t write_offset = offset;

- do {

- if (replace_length) {

- str->replace(write_offset, replace_length,

- replace_with.data(), replace_with.size());

- write_offset += replace_length;

- }

- size_t read_offset = offset + find_length;

- offset = std::min(

- str->find(find_this.data(), read_offset, find_this.size()),

- str_length);

- size_t length = offset - read_offset;

- if (length) {

- memmove(&(*str)[write_offset], &(*str)[read_offset],

- length * sizeof(typename StringType::value_type));

- write_offset += length;

+ size_t expansion = 0;

+ if (replace_length > find_length) {

+ // This operation lengthens the string; determine the new length by counting

+ // matches.

+ size_t num_matches = 0;

+ for (size_t match = first_match; match != StringType::npos;

danakj 2017/07/27 15:55:30 we should be consistent and either for() always li

ncarter (slow) 2017/07/28 02:34:24 Switched to a for loop above, based on the convers

+ match = str->find(find_this.data(), match + find_length,

+ find_this.length())) {

danakj 2017/07/27 15:55:30 nit: find_length?

ncarter (slow) 2017/07/28 02:34:25 Done.

+ num_matches++;

Peter Kasting 2017/07/27 05:25:14 Nit: Totally personal style, but postincrements re

ncarter (slow) 2017/07/28 02:34:24 Done.

+ }

+ expansion = (replace_length - find_length) * num_matches;

danakj 2017/07/27 15:55:30 this is nitty, but as per peter's comment below, i

ncarter (slow) 2017/07/28 02:34:24 I've eliminated the imul instruction, by accumulat

+ const size_t final_length = str_length + expansion;

+ if (str->capacity() < final_length) {

ncarter (slow) 2017/07/26 23:50:14 Using a temporary seems to result in fewer copies

Peter Kasting 2017/07/27 01:09:57 I tried to optimize the original code to do as few

danakj 2017/07/27 15:55:30 I'd like to understand that we really want to use

I'd like to understand that we really want to use the memmove() one. Is the claim that StrType::replace() is doing so with memmove()? In that case almost always the src and dst are going to overlap for the shifting replace(), right? Then the replace() involves a copy (and surely a malloc). By that time we've done about as much work as this more concise path (maybe the number of bytes copied is fewer).

Then one each replace() to move characters up, in the worst case, many of them overlap: eg. replace ""aa" with "bbb" cccccccaacccccccaacccccccaacccccccaa ->cccccccaa____cccccccaacccccccaacccccccaa ->cccccccbbb___cccccccaacccccccaacccccccaa ->cccccccbbbcccccccaa___cccccccaacccccccaa <- memmove overlaps = copy to temp buffer (+ malloc?) ->cccccccbbbcccccccbbb__cccccccaacccccccaa ->cccccccbbbcccccccbbbcccccccaa__cccccccaa <- memmove overlaps = copy to temp buffer (+ malloc?) ->cccccccbbbcccccccbbbcccccccbbb_cccccccaa ->cccccccbbbcccccccbbbcccccccbbbcccccccaa_ <- memmove overlaps = copy to temp buffer (+ malloc?) ->cccccccbbbcccccccbbbcccccccbbbcccccccbbb So this would be at least 3 copies into temp buffers, maybe 3 mallocs. Maybe more if StrType::replace() is implemented with memmove(). Using the swap/reserve/copy seems better to me. Maybe we should microbenchmark/measure what's faster?

danakj 2017/07/27 15:58:48 Oops, I meant memmove() here as it's written expli

On 2017/07/27 15:55:30, danakj wrote: > On 2017/07/27 01:09:57, Peter Kasting wrote: > > On 2017/07/26 23:50:14, ncarter (slow) wrote: > > > Using a temporary seems to result in fewer copies of the string overall. > From > > > what I can tell, using a temporary can be more efficient than > str->replace(_, > > _, > > > str, _, _) in MSVC's/dinkumware's STL, since when replace() needs to grow > the > > > capacity, it does a memcpy from the old to the new buffer, and then a > memmove > > > from the new buffer to itself. > > > > > > So we could actually consider always using a temp and dropping the memmove > > > codepath, if we don't worry about the memory churn, or the effects of losing > > the > > > information embedded in str's capacity. > > > > > > Alternatively, if we really want to preserve the exponental growth of str's > > > capacity, we could always do the memmove loop, even when the resize/replace > > loop > > > would need to realloc. > > > > > > In other words: there's two implementations of lengthening here, they should > > > both be correct in all cases, and I'd be happy to drop one. > > > > I tried to optimize the original code to do as few memory accesses as > possible. > > However, I did not look at (nor do I really want to look at) factors like "how > > are these STL implementations built under the hood"; that can change at any > > time. The limit of how far I'll go with that is rough guesses like "memmove() > > is exactly what I want, string::replace() has to handle other cases too" (see > > reply lower down). > > > > Preserving exponential capacity growth is a non-goal, IIUC; that's basically > an > > issue when people are repeatedly calling DoReplace... with longer strings, > > right? (Since the code right here is not in a loop.) > > > > I am not a big fan of churning memory when we don't need to. > > I'd like to understand that we really want to use the memmove() one. Is the > claim that StrType::replace() is doing so with memmove()? In that case almost > always the src and dst are going to overlap for the shifting replace(), right? > Then the replace() involves a copy (and surely a malloc). By that time we've > done about as much work as this more concise path (maybe the number of bytes > copied is fewer). > > > The memory areas may overlap: copying takes place as > > though the bytes in src are first copied into a temporary array that > > does not overlap src or dest, and the bytes are then copied from the > > temporary array to dest. > [http://man7.org/linux/man-pages/man3/memmove.3.html] > > Then one each replace() to move characters up, in the worst case, many of them > overlap:

Oops, I meant memmove() here as it's written explicitly as such for this case. If I misunderstood the memove algo pls also correct me.

Peter Kasting 2017/07/27 22:09:47 I don't think memmove() usually copies to a temp b

Peter Kasting 2017/07/27 22:11:21 More confirmation of this: see the answer (and com

ncarter (slow) 2017/07/28 02:34:25 I agree with danakj's intuition that a temp seems

On 2017/07/27 15:58:48, danakj wrote: > On 2017/07/27 15:55:30, danakj wrote: > > On 2017/07/27 01:09:57, Peter Kasting wrote: > > > On 2017/07/26 23:50:14, ncarter (slow) wrote: > > > > Using a temporary seems to result in fewer copies of the string overall. > > From > > > > what I can tell, using a temporary can be more efficient than > > str->replace(_, > > > _, > > > > str, _, _) in MSVC's/dinkumware's STL, since when replace() needs to grow > > the > > > > capacity, it does a memcpy from the old to the new buffer, and then a > > memmove > > > > from the new buffer to itself. > > > > > > > > So we could actually consider always using a temp and dropping the memmove > > > > codepath, if we don't worry about the memory churn, or the effects of > losing > > > the > > > > information embedded in str's capacity. > > > > > > > > Alternatively, if we really want to preserve the exponental growth of > str's > > > > capacity, we could always do the memmove loop, even when the > resize/replace > > > loop > > > > would need to realloc. > > > > > > > > In other words: there's two implementations of lengthening here, they > should > > > > both be correct in all cases, and I'd be happy to drop one. > > > > > > I tried to optimize the original code to do as few memory accesses as > > possible. > > > However, I did not look at (nor do I really want to look at) factors like > "how > > > are these STL implementations built under the hood"; that can change at any > > > time. The limit of how far I'll go with that is rough guesses like > "memmove() > > > is exactly what I want, string::replace() has to handle other cases too" > (see > > > reply lower down). > > > > > > Preserving exponential capacity growth is a non-goal, IIUC; that's basically > > an > > > issue when people are repeatedly calling DoReplace... with longer strings, > > > right? (Since the code right here is not in a loop.) > > > > > > I am not a big fan of churning memory when we don't need to. > > > > I'd like to understand that we really want to use the memmove() one. Is the > > claim that StrType::replace() is doing so with memmove()? In that case almost > > always the src and dst are going to overlap for the shifting replace(), right? > > Then the replace() involves a copy (and surely a malloc). By that time we've > > done about as much work as this more concise path (maybe the number of bytes > > copied is fewer). > > > > > The memory areas may overlap: copying takes place as > > > though the bytes in src are first copied into a temporary array that > > > does not overlap src or dest, and the bytes are then copied from the > > > temporary array to dest. > > [http://man7.org/linux/man-pages/man3/memmove.3.html] > > > > Then one each replace() to move characters up, in the worst case, many of them > > overlap: > > Oops, I meant memmove() here as it's written explicitly as such for this case. > If I misunderstood the memove algo pls also correct me. > > > > > eg. replace ""aa" with "bbb" > > > > cccccccaacccccccaacccccccaacccccccaa > > ->cccccccaa____cccccccaacccccccaacccccccaa > > ->cccccccbbb___cccccccaacccccccaacccccccaa > > ->cccccccbbbcccccccaa___cccccccaacccccccaa <- memmove overlaps = copy to temp > > buffer (+ malloc?) > > ->cccccccbbbcccccccbbb__cccccccaacccccccaa > > ->cccccccbbbcccccccbbbcccccccaa__cccccccaa <- memmove overlaps = copy to temp > > buffer (+ malloc?) > > ->cccccccbbbcccccccbbbcccccccbbb_cccccccaa > > ->cccccccbbbcccccccbbbcccccccbbbcccccccaa_ <- memmove overlaps = copy to temp > > buffer (+ malloc?) > > ->cccccccbbbcccccccbbbcccccccbbbcccccccbbb > > > > So this would be at least 3 copies into temp buffers, maybe 3 mallocs. Maybe > > more if StrType::replace() is implemented with memmove(). > > > > Using the swap/reserve/copy seems better to me. Maybe we should > > microbenchmark/measure what's faster? >

I agree with danakj's intuition that a temp seems faster overall, but it looks like we count a different number of the malloc's in the memmove path. From what I saw: the VC implementation of string::replace starts with a check to see if the substring region overlaps with |str|, and if so, uses a more careful path, where copies are done using the 'move' function from the string traits (i.e. memmove, which is correct in the case of overlapping src/dst) rather than the 'copy' function (i.e. memcpy). I didn't see any case where replace() creates a temporary copy of the src, or where replace does a malloc, except to grow the capacity. Also, my understanding is that memmove does not ever call malloc (even though the man page says something about "copying takes place AS THOUGH the bytes in src are first copied into a temporary array"). In the in-place expansion path, I only expect malloc to happen in two places: the resize() on line 799, and the subsequent replace() on line 801. But because of the capacity check on line 768, it was my expectation that neither of these would actually result in mallocs as written. If we were using the memmove path unconditionally, we'd want to add a reserve(final_length) operation before the resize(), and that would then be the only place where a malloc might happen. Regarding tradeoffs -- here's what I see: If existing capacity is sufficient, then avoiding a temporary has the following advantages: - If there is only one match, it's optimal. - If there are multiple matches, but they're at the end of a long string, we don't incur a copy for the region before the first match at all. - We don't incur the cost of a malloc, and don't churn memory. - We preserve the excess capacity; VC's string grows the buffer exponentially (looks like a factor of 1.5, maybe). This might reduce the total number of mallocs in usages like this: https://cs.chromium.org/chromium/src/net/base/filename_util.cc?type=cs&q=Repl..., where ReplaceSubstrings is called several times in a row. Using a temporary has tons of perf advantages, though: - Fewer copy operations in the worst case. - Since the copies are to non-overlapping regions, they'll use a memcpy() instead of memmove(), which should be faster (though it's complicated: memmove might get a speed boost from the fact that a copy operation on an overlapping region touches fewer cache lines overall -- i.e. an in-place move consumes less memory bandwidth, and one would hope that large copies are dominated by memory bandwidth, not computation). Basically, it comes down to "are the costs of the malloc for the temporary high enough, that we'd be willing to incur an extra full-length memmove to avoid it?".

Peter Kasting 2017/07/28 08:29:24 By 1, right? Because we replace "copy to back" wi

danakj 2017/07/28 15:53:48 In my profiling work in chromium, code has been co

In my profiling work in chromium, code has been consistently O(# of mallocs). They dominate pretty much everything else. If memmove doesn't cause a malloc then that should perform better. Measuring is how to know for sure tho of course.

ncarter (slow) 2017/07/28 21:46:34 No, by a difference of 2. In the worse case the wh

No, by a difference of 2. In the worse case the whole string would get memcpy'ed or memmoved nearly 3 times for the "grow in place" path, versus exactly 1 time for the in-place append path. (there is also one extra find() in the in-place path, since the loop doesn't know to stop after the last match), Additionally, there may be some extra memset's if we hit the resize call on line 799 (these are Patch Set 8 line numbers). Imagine the following case. str = "xyxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyxy" str.capacity() == str.length() == 41 find_this = "x" replace_with = "xx" Let's say we decide trying to use the in-place copy here. We get to the line 801 replace() operation, which is: str->replace(4, 37, str, 1, 40); Internally, in the MSVC impl, this will do a reserve(42) operation: if (this->_Mysize() < _Newsize) _Grow(_Newsize); Which then hits this case inside of Grow (Myres is the capacity): if (this->_Myres() < _Newsize) _Copy(_Newsize, this->_Mysize()); // reallocate to grow Which always does a fresh allocation: pointer _Ptr; _TRY_BEGIN _Ptr = this->_Getal().allocate(_Newres + 1); And then memcpy's from the old ptr to the new ptr: if (0 < _Oldlen) _Traits::copy(_Unfancy(_Ptr), this->_Myptr(), _Oldlen); // copy existing elements Then we're back in replace(), which will hit a path like this: else if (_Roff <= _Off) { // hole gets larger, substring begins before hole _Traits::move(this->_Myptr() + _Off + _Count, this->_Myptr() + _Off + _N0, _Nm); // move tail down _Traits::move(this->_Myptr() + _Off, this->_Myptr() + _Roff, _Count); // fill hole } _Nm here is zero, so really the memmove here is the second one, which is something like: _Traits::move(this->_Myptr() + 4, this->_Myptr() + 1, 37); After that operation the memory is like this: xyxyxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyxy And each byte of the new memory has been written this many times: 1112222222222222222222222222222222222222211 So this replace() operation is itself 2 whole-string memcpys (really because the MSVC STL is missing a potential optimization here, but it is what it is). Then we have the third almost-whole-string memcpy in our code, to shift the middle bank of y's down. So three writes to almost every memory location: 2223333333333333333333333333333333333333321 The old rfind code, worst-case, was 1 whole-string memcpy (for the reserve() call implicit in resize()), then a memzero of |expansion| chars (initializing the extended tail in the resize() call), then the mmemmove pass in our code. So it looked like this: reserve (memcpy): 1111111111111111111111111111111111111111000 resize (memzero): + 0000000000000000000000000000000000000000111 our loop (memmove/memcpy): + 1111111111111111111111111111111111111111111 --------------------------------------------- = 2222222222222222222222222222222222222222222 (total cost) Whereas the append approach looks like this, all of the time: 1111111111111111111111111111111111111111111 ======================== My apples-to-apples breakdown accounting (which ignores find() cost, which is the same for all approaches) is something like: - append to new memory: requires one malloc and |str_length| char copies. Is correct. - rfind approach, sufficient capacity: requires |str_length - first_match| char copies (plus |new_length - str_length| memzeros). Is incorrect. - rfind approach, insufficient capacity: requires |2*str_length - first_match| char copies (plus |new_length - str_length| memzeros) and one malloc. Is incorrect. - shift-down approach, sufficient capacity: requires |str_length + last_match - 2*first_match| char copies. Is correct. - shift-down approach, insufficient capacity: requires |2*str_length + last_match - 2*first_match| char copies and one malloc. Is correct. ======================== Given the above table, the choice is obvious except for the sufficient-capacity case, where we're picking between: (A) append (one malloc and |str_length| char copies), vs (B) shift-down (|str_length + last_match - 2*first_match| char copies, which is 2*str_length in the worst case -- as described below). The best case for shift-down is late-string matches, where the cost is basically zero (last_match == first_match == str_length). The worst case is when (last_match == str_length && first_match == 0), and the cost is 2*str_length. Also -- I think this bears repeating -- our shift-down is optimal (copy-wise) for the single-match case. All in all, I think really points us in the direction of including the shift-down strategy in the sufficient-capacity case. (2*str_length) copies in the worst case, with 1*str_length copies in the average case, and an optimally low number of copies in an important common case -- this seems likelt to be better than a guaranteed cost of (1 malloc + 1*str_length copies) in all cases. If we want to support an additional block of copy logic, we can add preprocessing to determine if rfind is safe. rfind is always safe for single-char |find_this|, and safety is trivial (a[0] == a[len - 1]) to determine for 2-char and 3-char |find_this|. We don't need to do an rfind determination if num_matches == 1, or if capacity is insufficient. Maybe as a follow-on CL?

Peter Kasting 2017/07/29 02:18:01 To summarize your explanation: if we have to grow

ncarter (slow) 2017/07/31 18:55:39 Agree. FWIW, I figured out the algorithm for the

Agree. FWIW, I figured out the algorithm for the "determine if a string has self-overlap that would make rfind potentially dangerous" and it's actually really elegant: seems to require exactly |find_length/2| char comparisons in the worst case. But it's also really subtle, and I'm not convinced it's worth the complexity/documentation cost -- it's a slippery slope from there to full on boyer-moore. Even so: a recommended fun exercise / interview question.

+ // Since we'd have to realloc the string anyway, use a temporary to build

+ // the result.

+ StringType src;

+ str->swap(src);

+ str->reserve(final_length);

ncarter (slow) 2017/07/26 23:50:14 Should we worry that using a temporary here will b

Peter Kasting 2017/07/27 01:09:57 See reply above.

ncarter (slow) 2017/07/28 02:34:25 Acknowledged. FWIW this article is what was on my

+ size_t pos = 0;

+ for (size_t i = 0; i < num_matches; ++i) {

Peter Kasting 2017/07/27 05:25:14 Nit: I feel like we ought to be able to write this

ncarter (slow) 2017/07/28 02:34:24 Writing it as you suggest means there's an extra c

+ size_t match =

+ (i == 0) ? first_match

+ : src.find(find_this.data(), pos, find_this.length());

danakj 2017/07/27 15:55:30 nit: find_length?

ncarter (slow) 2017/07/28 02:34:25 Done.

+ str->append(src, pos, match - pos);

+ str->append(replace_with.data(), replace_with.length());

danakj 2017/07/27 15:55:30 nit: replace_length?

ncarter (slow) 2017/07/28 02:34:25 Done.

+ pos = match + find_length;

}

- } while (offset < str_length);

- str->resize(write_offset);

- return;

+ // Handle substring after the final match.

+ str->append(src, pos, str_length - pos);

+ return;

+ }

+ // Prepare for the memmove loop below -- expand the string to its final size

+ // by shifting the data after the first match to the end of the resized

+ // string.

+ size_t shift_src = first_match + find_length;

+ size_t shift_dst = shift_src + expansion;

+ // Big |expansion| factors (relative to |str_length|) require padding up to

+ // |shift_dst|.

+ if (shift_dst > str_length)

+ str->resize(shift_dst);

+ str->replace(shift_dst, str_length - shift_src, *str, shift_src,

+ str_length - shift_src);

Peter Kasting 2017/07/27 05:25:14 Nit: We should probably be consistent about whethe

ncarter (slow) 2017/07/28 02:34:25 This particular replace can't be replaced with mem

+ str_length = final_length;

}

- // We're lengthening the string. We can use alternating replacements and

- // memmove() calls like above, but we need to precalculate the final string

- // length and then expand from back-to-front to avoid overwriting the string

- // as we're reading it, needing to shift, or having to copy to a second string

Peter Kasting 2017/07/27 01:09:57 This "needing to shift" is, AFAICT, exactly what y

Peter Kasting 2017/07/27 05:25:14 That said, the only way I can think of to do this

ncarter (slow) 2017/07/28 02:34:25 I'd thought about this too. The best I came up wit

- // temporarily.

- size_t first_match = offset;

- // First, calculate the final length and resize the string.

- size_t final_length = str_length;

- size_t expansion = replace_length - find_length;

- size_t current_match;

+ // We can alternate replacements with memmove. This won't overwrite the source

+ // region so long as |write_offset| <= |read_offset|; that is guaranteed

+ // because:

+ //

+ // (a) If the string is being shortened, |expansion| is zero and

+ // |write_offset| grows slower than |read_offset|.

+ //

+ // (b) If the string is being lengthened, |write_offset| grows faster than

+ // will only catch up to |read_offset| at the point of the last match.

+ size_t write_offset = first_match;

+ size_t read_offset = first_match + expansion;

do {

ncarter (slow) 2017/07/26 23:50:14 Note that this is essentially the old 'string is s

ncarter (slow) 2017/07/28 02:34:25 Done.

- final_length += expansion;

- // Minor optimization: save this offset into |current_match|, so that on

- // exit from the loop, |current_match| will point at the last instance of

- // the find string, and we won't need to find() it again immediately.

- current_match = offset;

- offset = str->find(find_this.data(), offset + find_length,

- find_this.size());

- } while (offset != StringType::npos);

- str->resize(final_length);

- // Now do the replacement loop, working backwards through the string.

- for (size_t prev_match = str_length, write_offset = final_length; ;

- current_match = str->rfind(find_this.data(), current_match - 1,

- find_this.size())) {

- size_t read_offset = current_match + find_length;

- size_t length = prev_match - read_offset;

+ if (replace_length) {

+ str->replace(write_offset, replace_length, replace_with.data(),

+ replace_length);

+ write_offset += replace_length;

+ }

+ read_offset += find_length;

+ size_t match =

+ std::min(str->find(find_this.data(), read_offset, find_this.length()),

+ str_length);

+ size_t length = match - read_offset;

if (length) {

- write_offset -= length;

memmove(&(*str)[write_offset], &(*str)[read_offset],

ncarter (slow) 2017/07/26 23:50:14 Any reason we preferred memmove over string::repla

Peter Kasting 2017/07/27 01:09:57 I don't recall having one, just "I know I'm moving

ncarter (slow) 2017/07/28 02:34:25 I've switched to StringType::traits_type::move/cop

Peter Kasting 2017/07/28 08:29:24 It's more readable too. +1.

length * sizeof(typename StringType::value_type));

+ write_offset += length;

+ read_offset += length;

}

- write_offset -= replace_length;

- str->replace(write_offset, replace_length,

- replace_with.data(), replace_with.size());

- if (current_match == first_match)

- return;

- prev_match = current_match;

- }

+ } while (read_offset < str_length);

+ // If we're shortening the string, truncate it now.

+ str->resize(write_offset);

+ return;

Peter Kasting 2017/07/27 05:25:14 Nit: Trailing return unnecessary

ncarter (slow) 2017/07/28 02:34:24 Done.

}

void ReplaceFirstSubstringAfterOffset(string16* str,

« no previous file with comments | « no previous file | base/strings/string_util_unittest.cc » ('j') | base/strings/string_util_unittest.cc » ('J')