Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(252)

Side by Side Diff: base/strings/string_util.cc

Issue 2979393002: [string_util] fix bug in ReplaceSubstringsAfterOffset() (Closed)
Patch Set: Reduce scope of change to just correctness fix. Created 3 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/strings/string_util.h" 5 #include "base/strings/string_util.h"
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <errno.h> 8 #include <errno.h>
9 #include <math.h> 9 #include <math.h>
10 #include <stdarg.h> 10 #include <stdarg.h>
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after
705 kByteStringsUnlocalized[dimension]); 705 kByteStringsUnlocalized[dimension]);
706 } else { 706 } else {
707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
708 kByteStringsUnlocalized[dimension]); 708 kByteStringsUnlocalized[dimension]);
709 } 709 }
710 710
711 return ASCIIToUTF16(buf); 711 return ASCIIToUTF16(buf);
712 } 712 }
713 713
714 // Runs in O(n) time in the length of |str|. 714 // Runs in O(n) time in the length of |str|.
715 template<class StringType> 715 template <class StringType>
716 void DoReplaceSubstringsAfterOffset(StringType* str, 716 void DoReplaceSubstringsAfterOffset(StringType* str,
717 size_t offset, 717 size_t initial_offset,
718 BasicStringPiece<StringType> find_this, 718 BasicStringPiece<StringType> find_this,
719 BasicStringPiece<StringType> replace_with, 719 BasicStringPiece<StringType> replace_with,
720 bool replace_all) { 720 bool replace_all) {
721 DCHECK(!find_this.empty()); 721 DCHECK(!find_this.empty());
722 722
723 // If the find string doesn't appear, there's nothing to do. 723 // If the find string doesn't appear, there's nothing to do.
724 offset = str->find(find_this.data(), offset, find_this.size()); 724 size_t first_match =
725 if (offset == StringType::npos) 725 str->find(find_this.data(), initial_offset, find_this.length());
726 if (first_match == StringType::npos)
726 return; 727 return;
727 728
728 // If we're only replacing one instance, there's no need to do anything 729 // If we're only replacing one instance, there's no need to do anything
729 // complicated. 730 // complicated.
730 size_t find_length = find_this.length(); 731 const size_t find_length = find_this.length();
732 const size_t replace_length = replace_with.length();
731 if (!replace_all) { 733 if (!replace_all) {
732 str->replace(offset, find_length, replace_with.data(), replace_with.size()); 734 str->replace(first_match, find_length, replace_with.data(), replace_length);
733 return; 735 return;
734 } 736 }
735 737
736 // If the find and replace strings are the same length, we can simply use 738 // If the find and replace strings are the same length, we can simply use
737 // replace() on each instance, and finish the entire operation in O(n) time. 739 // replace() on each instance, and finish the entire operation in O(n) time.
738 size_t replace_length = replace_with.length();
739 if (find_length == replace_length) { 740 if (find_length == replace_length) {
741 size_t offset = first_match;
Peter Kasting 2017/07/27 01:09:57 Nit: It does one unnecessary comparison at the sta
ncarter (slow) 2017/07/28 02:34:25 Done. FWIW it looks like MSVC manages to omit the
740 do { 742 do {
741 str->replace(offset, find_length, 743 str->replace(offset, find_length, replace_with.data(), replace_length);
742 replace_with.data(), replace_with.size());
743 offset = str->find(find_this.data(), offset + replace_length, 744 offset = str->find(find_this.data(), offset + replace_length,
744 find_this.size()); 745 find_this.length());
danakj 2017/07/27 15:55:30 nit: this could be written find_length?
ncarter (slow) 2017/07/28 02:34:25 Done.
745 } while (offset != StringType::npos); 746 } while (offset != StringType::npos);
746 return; 747 return;
747 } 748 }
748 749
749 // Since the find and replace strings aren't the same length, a loop like the 750 // Since the find and replace strings aren't the same length, a loop like the
750 // one above would be O(n^2) in the worst case, as replace() will shift the 751 // one above would be O(n^2) in the worst case, as replace() will shift the
751 // entire remaining string each time. We need to be more clever to keep 752 // entire remaining string each time. We need to be more clever to keep things
752 // things O(n). 753 // O(n).
Peter Kasting 2017/07/27 05:25:14 Nit: Somewhere, we need comments that say what the
ncarter (slow) 2017/07/28 02:34:25 Done.
753 //
754 // If we're shortening the string, we can alternate replacements with shifting
755 // forward the intervening characters using memmove().
756 size_t str_length = str->length(); 754 size_t str_length = str->length();
757 if (find_length > replace_length) { 755 size_t expansion = 0;
758 size_t write_offset = offset; 756 if (replace_length > find_length) {
759 do { 757 // This operation lengthens the string; determine the new length by counting
760 if (replace_length) { 758 // matches.
761 str->replace(write_offset, replace_length, 759 size_t num_matches = 0;
762 replace_with.data(), replace_with.size()); 760 for (size_t match = first_match; match != StringType::npos;
danakj 2017/07/27 15:55:30 we should be consistent and either for() always li
ncarter (slow) 2017/07/28 02:34:24 Switched to a for loop above, based on the convers
763 write_offset += replace_length; 761 match = str->find(find_this.data(), match + find_length,
762 find_this.length())) {
danakj 2017/07/27 15:55:30 nit: find_length?
ncarter (slow) 2017/07/28 02:34:25 Done.
763 num_matches++;
Peter Kasting 2017/07/27 05:25:14 Nit: Totally personal style, but postincrements re
ncarter (slow) 2017/07/28 02:34:24 Done.
764 }
765 expansion = (replace_length - find_length) * num_matches;
danakj 2017/07/27 15:55:30 this is nitty, but as per peter's comment below, i
ncarter (slow) 2017/07/28 02:34:24 I've eliminated the imul instruction, by accumulat
766 const size_t final_length = str_length + expansion;
767
768 if (str->capacity() < final_length) {
ncarter (slow) 2017/07/26 23:50:14 Using a temporary seems to result in fewer copies
Peter Kasting 2017/07/27 01:09:57 I tried to optimize the original code to do as few
danakj 2017/07/27 15:55:30 I'd like to understand that we really want to use
danakj 2017/07/27 15:58:48 Oops, I meant memmove() here as it's written expli
Peter Kasting 2017/07/27 22:09:47 I don't think memmove() usually copies to a temp b
Peter Kasting 2017/07/27 22:11:21 More confirmation of this: see the answer (and com
ncarter (slow) 2017/07/28 02:34:25 I agree with danakj's intuition that a temp seems
Peter Kasting 2017/07/28 08:29:24 By 1, right? Because we replace "copy to back" wi
danakj 2017/07/28 15:53:48 In my profiling work in chromium, code has been co
ncarter (slow) 2017/07/28 21:46:34 No, by a difference of 2. In the worse case the wh
Peter Kasting 2017/07/29 02:18:01 To summarize your explanation: if we have to grow
ncarter (slow) 2017/07/31 18:55:39 Agree. FWIW, I figured out the algorithm for the
769 // Since we'd have to realloc the string anyway, use a temporary to build
770 // the result.
771 StringType src;
772 str->swap(src);
773 str->reserve(final_length);
ncarter (slow) 2017/07/26 23:50:14 Should we worry that using a temporary here will b
Peter Kasting 2017/07/27 01:09:57 See reply above.
ncarter (slow) 2017/07/28 02:34:25 Acknowledged. FWIW this article is what was on my
774
775 size_t pos = 0;
776 for (size_t i = 0; i < num_matches; ++i) {
Peter Kasting 2017/07/27 05:25:14 Nit: I feel like we ought to be able to write this
ncarter (slow) 2017/07/28 02:34:24 Writing it as you suggest means there's an extra c
777 size_t match =
778 (i == 0) ? first_match
779 : src.find(find_this.data(), pos, find_this.length());
danakj 2017/07/27 15:55:30 nit: find_length?
ncarter (slow) 2017/07/28 02:34:25 Done.
780 str->append(src, pos, match - pos);
781 str->append(replace_with.data(), replace_with.length());
danakj 2017/07/27 15:55:30 nit: replace_length?
ncarter (slow) 2017/07/28 02:34:25 Done.
782 pos = match + find_length;
764 } 783 }
765 size_t read_offset = offset + find_length; 784
766 offset = std::min( 785 // Handle substring after the final match.
767 str->find(find_this.data(), read_offset, find_this.size()), 786 str->append(src, pos, str_length - pos);
768 str_length); 787 return;
769 size_t length = offset - read_offset; 788 }
770 if (length) { 789
771 memmove(&(*str)[write_offset], &(*str)[read_offset], 790 // Prepare for the memmove loop below -- expand the string to its final size
772 length * sizeof(typename StringType::value_type)); 791 // by shifting the data after the first match to the end of the resized
773 write_offset += length; 792 // string.
774 } 793 size_t shift_src = first_match + find_length;
775 } while (offset < str_length); 794 size_t shift_dst = shift_src + expansion;
776 str->resize(write_offset); 795
777 return; 796 // Big |expansion| factors (relative to |str_length|) require padding up to
797 // |shift_dst|.
798 if (shift_dst > str_length)
799 str->resize(shift_dst);
800
801 str->replace(shift_dst, str_length - shift_src, *str, shift_src,
802 str_length - shift_src);
Peter Kasting 2017/07/27 05:25:14 Nit: We should probably be consistent about whethe
ncarter (slow) 2017/07/28 02:34:25 This particular replace can't be replaced with mem
803 str_length = final_length;
778 } 804 }
779 805
780 // We're lengthening the string. We can use alternating replacements and 806 // We can alternate replacements with memmove. This won't overwrite the source
781 // memmove() calls like above, but we need to precalculate the final string 807 // region so long as |write_offset| <= |read_offset|; that is guaranteed
782 // length and then expand from back-to-front to avoid overwriting the string 808 // because:
783 // as we're reading it, needing to shift, or having to copy to a second string 809 //
Peter Kasting 2017/07/27 01:09:57 This "needing to shift" is, AFAICT, exactly what y
Peter Kasting 2017/07/27 05:25:14 That said, the only way I can think of to do this
ncarter (slow) 2017/07/28 02:34:25 I'd thought about this too. The best I came up wit
784 // temporarily. 810 // (a) If the string is being shortened, |expansion| is zero and
785 size_t first_match = offset; 811 // |write_offset| grows slower than |read_offset|.
786 812 //
787 // First, calculate the final length and resize the string. 813 // (b) If the string is being lengthened, |write_offset| grows faster than
788 size_t final_length = str_length; 814 // |read_offset|, but |expansion| is big enough so that |write_offset|
789 size_t expansion = replace_length - find_length; 815 // will only catch up to |read_offset| at the point of the last match.
790 size_t current_match; 816 size_t write_offset = first_match;
817 size_t read_offset = first_match + expansion;
791 do { 818 do {
ncarter (slow) 2017/07/26 23:50:14 Note that this is essentially the old 'string is s
ncarter (slow) 2017/07/28 02:34:25 Done.
792 final_length += expansion; 819 if (replace_length) {
793 // Minor optimization: save this offset into |current_match|, so that on 820 str->replace(write_offset, replace_length, replace_with.data(),
794 // exit from the loop, |current_match| will point at the last instance of 821 replace_length);
795 // the find string, and we won't need to find() it again immediately. 822 write_offset += replace_length;
796 current_match = offset; 823 }
797 offset = str->find(find_this.data(), offset + find_length, 824 read_offset += find_length;
798 find_this.size()); 825 size_t match =
799 } while (offset != StringType::npos); 826 std::min(str->find(find_this.data(), read_offset, find_this.length()),
800 str->resize(final_length); 827 str_length);
801 828 size_t length = match - read_offset;
802 // Now do the replacement loop, working backwards through the string.
803 for (size_t prev_match = str_length, write_offset = final_length; ;
804 current_match = str->rfind(find_this.data(), current_match - 1,
805 find_this.size())) {
806 size_t read_offset = current_match + find_length;
807 size_t length = prev_match - read_offset;
808 if (length) { 829 if (length) {
809 write_offset -= length;
810 memmove(&(*str)[write_offset], &(*str)[read_offset], 830 memmove(&(*str)[write_offset], &(*str)[read_offset],
ncarter (slow) 2017/07/26 23:50:14 Any reason we preferred memmove over string::repla
Peter Kasting 2017/07/27 01:09:57 I don't recall having one, just "I know I'm moving
ncarter (slow) 2017/07/28 02:34:25 I've switched to StringType::traits_type::move/cop
Peter Kasting 2017/07/28 08:29:24 It's more readable too. +1.
811 length * sizeof(typename StringType::value_type)); 831 length * sizeof(typename StringType::value_type));
832 write_offset += length;
833 read_offset += length;
812 } 834 }
813 write_offset -= replace_length; 835 } while (read_offset < str_length);
814 str->replace(write_offset, replace_length, 836
815 replace_with.data(), replace_with.size()); 837 // If we're shortening the string, truncate it now.
816 if (current_match == first_match) 838 str->resize(write_offset);
817 return; 839
818 prev_match = current_match; 840 return;
Peter Kasting 2017/07/27 05:25:14 Nit: Trailing return unnecessary
ncarter (slow) 2017/07/28 02:34:24 Done.
819 }
820 } 841 }
821 842
822 void ReplaceFirstSubstringAfterOffset(string16* str, 843 void ReplaceFirstSubstringAfterOffset(string16* str,
823 size_t start_offset, 844 size_t start_offset,
824 StringPiece16 find_this, 845 StringPiece16 find_this,
825 StringPiece16 replace_with) { 846 StringPiece16 replace_with) {
826 DoReplaceSubstringsAfterOffset<string16>( 847 DoReplaceSubstringsAfterOffset<string16>(
827 str, start_offset, find_this, replace_with, false); // Replace first. 848 str, start_offset, find_this, replace_with, false); // Replace first.
828 } 849 }
829 850
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
1042 } // namespace 1063 } // namespace
1043 1064
1044 size_t strlcpy(char* dst, const char* src, size_t dst_size) { 1065 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
1045 return lcpyT<char>(dst, src, dst_size); 1066 return lcpyT<char>(dst, src, dst_size);
1046 } 1067 }
1047 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 1068 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1048 return lcpyT<wchar_t>(dst, src, dst_size); 1069 return lcpyT<wchar_t>(dst, src, dst_size);
1049 } 1070 }
1050 1071
1051 } // namespace base 1072 } // namespace base
OLDNEW
« no previous file with comments | « no previous file | base/strings/string_util_unittest.cc » ('j') | base/strings/string_util_unittest.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698