Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(568)

Side by Side Diff: base/strings/string_util.cc

Issue 2979393002: [string_util] fix bug in ReplaceSubstringsAfterOffset() (Closed)
Patch Set: Unittest fixes. Created 3 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/strings/string_util.h" 5 #include "base/strings/string_util.h"
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <errno.h> 8 #include <errno.h>
9 #include <math.h> 9 #include <math.h>
10 #include <stdarg.h> 10 #include <stdarg.h>
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after
705 kByteStringsUnlocalized[dimension]); 705 kByteStringsUnlocalized[dimension]);
706 } else { 706 } else {
707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
708 kByteStringsUnlocalized[dimension]); 708 kByteStringsUnlocalized[dimension]);
709 } 709 }
710 710
711 return ASCIIToUTF16(buf); 711 return ASCIIToUTF16(buf);
712 } 712 }
713 713
714 // Runs in O(n) time in the length of |str|. 714 // Runs in O(n) time in the length of |str|.
715 template<class StringType> 715 template <class StringType>
716 void DoReplaceSubstringsAfterOffset(StringType* str, 716 void DoReplaceSubstringsAfterOffset(StringType* str,
717 size_t offset, 717 size_t initial_offset,
718 BasicStringPiece<StringType> find_this, 718 BasicStringPiece<StringType> find_this,
719 BasicStringPiece<StringType> replace_with, 719 BasicStringPiece<StringType> replace_with,
720 bool replace_all) { 720 bool replace_all) {
721 DCHECK(!find_this.empty()); 721 DCHECK(!find_this.empty());
722 722
723 // If the find string doesn't appear, there's nothing to do. 723 // If the find string doesn't appear, there's nothing to do.
724 offset = str->find(find_this.data(), offset, find_this.size()); 724 const size_t find_length = find_this.length();
725 if (offset == StringType::npos) 725 size_t first_match = str->find(find_this.data(), initial_offset, find_length);
726 if (first_match == StringType::npos)
726 return; 727 return;
727 728
728 // If we're only replacing one instance, there's no need to do anything 729 // If we're only replacing one instance, there's no need to do anything
729 // complicated. 730 // complicated.
730 size_t find_length = find_this.length(); 731 const size_t replace_length = replace_with.length();
731 if (!replace_all) { 732 if (!replace_all) {
732 str->replace(offset, find_length, replace_with.data(), replace_with.size()); 733 str->replace(first_match, find_length, replace_with.data(), replace_length);
733 return; 734 return;
734 } 735 }
735 736
736 // If the find and replace strings are the same length, we can simply use 737 // If the find and replace strings are the same length, we can simply use
737 // replace() on each instance, and finish the entire operation in O(n) time. 738 // replace() on each instance, and finish the entire operation in O(n) time.
738 size_t replace_length = replace_with.length();
739 if (find_length == replace_length) { 739 if (find_length == replace_length) {
740 do { 740 auto* buffer = &((*str)[0]);
danakj 2017/07/28 22:00:28 does str->data() not do what you want here?
Peter Kasting 2017/07/29 02:18:01 data() returns pointer-to-const until C++17, unfor
ncarter (slow) 2017/07/31 18:55:39 Yes, unfortunately.
741 str->replace(offset, find_length, 741 for (size_t offset = first_match; offset != StringType::npos;
742 replace_with.data(), replace_with.size()); 742 offset = str->find(find_this.data(), offset + replace_length,
743 offset = str->find(find_this.data(), offset + replace_length, 743 find_length)) {
744 find_this.size()); 744 StringType::traits_type::copy(buffer + offset, replace_with.data(),
Peter Kasting 2017/07/28 08:29:24 Nit: Wonder if something like either "using traits
ncarter (slow) 2017/07/28 21:46:34 Great suggestion. Done, with CharTraits as the nam
745 } while (offset != StringType::npos); 745 replace_length);
746 }
746 return; 747 return;
747 } 748 }
748 749
749 // Since the find and replace strings aren't the same length, a loop like the 750 // Since the find and replace strings aren't the same length, a loop like the
750 // one above would be O(n^2) in the worst case, as replace() will shift the 751 // one above would be O(n^2) in the worst case, as replace() will shift the
751 // entire remaining string each time. We need to be more clever to keep 752 // entire remaining string each time. We need to be more clever to keep things
752 // things O(n). 753 // O(n).
753 // 754 //
754 // If we're shortening the string, we can alternate replacements with shifting 755 // When the string is being shortened, it's possible to just shift the matches
755 // forward the intervening characters using memmove(). 756 // down in one pass while finding, and truncate the length at the end of the
757 // search.
758 //
759 // If the string is being lengthened, more work is required. The strategy used
760 // here is to make two find() passes through the string. The first pass counts
761 // the number of matches to determine the new size. The second pass will
762 // either construct the new string into a new buffer (if the existing buffer
763 // lacked capacity), or else -- if there is room -- create a region of scratch
764 // space after |first_match| by shifting the tail of the string to a higher
765 // index, and doing in-place moves from the tail to lower indices thereafter.
756 size_t str_length = str->length(); 766 size_t str_length = str->length();
757 if (find_length > replace_length) { 767 size_t expansion = 0;
758 size_t write_offset = offset; 768 if (replace_length > find_length) {
759 do { 769 // This operation lengthens the string; determine the new length by counting
760 if (replace_length) { 770 // matches.
761 str->replace(write_offset, replace_length, 771 const size_t expansion_per_match = (replace_length - find_length);
762 replace_with.data(), replace_with.size()); 772 size_t num_matches = 0;
763 write_offset += replace_length; 773 for (size_t match = first_match; match != StringType::npos;
774 match =
775 str->find(find_this.data(), match + find_length, find_length)) {
776 expansion += expansion_per_match;
777 ++num_matches;
778 }
779 const size_t final_length = str_length + expansion;
780
781 if (str->capacity() < final_length) {
782 // Since we'd have to realloc the string anyway, use a temporary to build
783 // the result.
784 StringType src;
785 str->swap(src);
786 str->reserve(final_length);
787
788 size_t pos = 0;
789 size_t match = first_match;
790 while (true) {
Peter Kasting 2017/07/28 08:29:24 Nit: Or: for (size_t match = first_match; ;
ncarter (slow) 2017/07/28 21:46:34 Done.
Peter Kasting 2017/07/29 02:18:01 I suggest adding that comment I proposed, or one l
ncarter (slow) 2017/07/31 18:55:39 Done.
791 str->append(src, pos, match - pos);
792 str->append(replace_with.data(), replace_length);
793 pos = match + find_length;
794 if (!--num_matches)
795 break;
796 match = src.find(find_this.data(), pos, find_length);
764 } 797 }
765 size_t read_offset = offset + find_length; 798
766 offset = std::min( 799 // Handle substring after the final match.
767 str->find(find_this.data(), read_offset, find_this.size()), 800 str->append(src, pos, str_length - pos);
768 str_length); 801 return;
769 size_t length = offset - read_offset; 802 }
770 if (length) { 803
771 memmove(&(*str)[write_offset], &(*str)[read_offset], 804 // Prepare for the copy/move loop below -- expand the string to its final
772 length * sizeof(typename StringType::value_type)); 805 // size by shifting the data after the first match to the end of the resized
773 write_offset += length; 806 // string.
774 } 807 size_t shift_src = first_match + find_length;
775 } while (offset < str_length); 808 size_t shift_dst = shift_src + expansion;
776 str->resize(write_offset); 809
777 return; 810 // Big |expansion| factors (relative to |str_length|) require padding up to
811 // |shift_dst|.
812 if (shift_dst > str_length)
813 str->resize(shift_dst);
814
815 str->replace(shift_dst, str_length - shift_src, *str, shift_src,
816 str_length - shift_src);
817 str_length = final_length;
778 } 818 }
779 819
780 // We're lengthening the string. We can use alternating replacements and 820 // We can alternate replacement and move operations. This won't overwrite the
781 // memmove() calls like above, but we need to precalculate the final string 821 // unsearched region of the string so long as |write_offset| <= |read_offset|;
782 // length and then expand from back-to-front to avoid overwriting the string 822 // that condition is always satisfied because:
783 // as we're reading it, needing to shift, or having to copy to a second string 823 //
784 // temporarily. 824 // (a) If the string is being shortened, |expansion| is zero and
785 size_t first_match = offset; 825 // |write_offset| grows slower than |read_offset|.
826 //
827 // (b) If the string is being lengthened, |write_offset| grows faster than
828 // |read_offset|, but |expansion| is big enough so that |write_offset|
829 // will only catch up to |read_offset| at the point of the last match.
830 auto* buffer = &((*str)[0]);
danakj 2017/07/28 22:00:28 also data()?
831 size_t write_offset = first_match;
832 size_t read_offset = first_match + expansion;
833 do {
834 if (replace_length) {
835 StringType::traits_type::copy(buffer + write_offset, replace_with.data(),
836 replace_length);
837 write_offset += replace_length;
838 }
839 read_offset += find_length;
840 size_t match = std::min(
danakj 2017/07/28 22:00:28 this min() call reads very dubiously, as npos is -
Peter Kasting 2017/07/29 02:18:01 FWIW, I've always thought of npos as SIZE_MAX, not
ncarter (slow) 2017/07/31 18:55:39 I added a comment, since it confused me at first t
841 str->find(find_this.data(), read_offset, find_length), str_length);
842 size_t length = match - read_offset;
843 if (length) {
844 StringType::traits_type::move(buffer + write_offset, buffer + read_offset,
845 length);
846 write_offset += length;
847 read_offset += length;
848 }
849 } while (read_offset < str_length);
786 850
787 // First, calculate the final length and resize the string. 851 // If we're shortening the string, truncate it now.
788 size_t final_length = str_length; 852 str->resize(write_offset);
789 size_t expansion = replace_length - find_length;
790 size_t current_match;
791 do {
792 final_length += expansion;
793 // Minor optimization: save this offset into |current_match|, so that on
794 // exit from the loop, |current_match| will point at the last instance of
795 // the find string, and we won't need to find() it again immediately.
796 current_match = offset;
797 offset = str->find(find_this.data(), offset + find_length,
798 find_this.size());
799 } while (offset != StringType::npos);
800 str->resize(final_length);
801
802 // Now do the replacement loop, working backwards through the string.
803 for (size_t prev_match = str_length, write_offset = final_length; ;
804 current_match = str->rfind(find_this.data(), current_match - 1,
805 find_this.size())) {
806 size_t read_offset = current_match + find_length;
807 size_t length = prev_match - read_offset;
808 if (length) {
809 write_offset -= length;
810 memmove(&(*str)[write_offset], &(*str)[read_offset],
811 length * sizeof(typename StringType::value_type));
812 }
813 write_offset -= replace_length;
814 str->replace(write_offset, replace_length,
815 replace_with.data(), replace_with.size());
816 if (current_match == first_match)
817 return;
818 prev_match = current_match;
819 }
820 } 853 }
821 854
822 void ReplaceFirstSubstringAfterOffset(string16* str, 855 void ReplaceFirstSubstringAfterOffset(string16* str,
823 size_t start_offset, 856 size_t start_offset,
824 StringPiece16 find_this, 857 StringPiece16 find_this,
825 StringPiece16 replace_with) { 858 StringPiece16 replace_with) {
826 DoReplaceSubstringsAfterOffset<string16>( 859 DoReplaceSubstringsAfterOffset<string16>(
827 str, start_offset, find_this, replace_with, false); // Replace first. 860 str, start_offset, find_this, replace_with, false); // Replace first.
828 } 861 }
829 862
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
1042 } // namespace 1075 } // namespace
1043 1076
1044 size_t strlcpy(char* dst, const char* src, size_t dst_size) { 1077 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
1045 return lcpyT<char>(dst, src, dst_size); 1078 return lcpyT<char>(dst, src, dst_size);
1046 } 1079 }
1047 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 1080 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1048 return lcpyT<wchar_t>(dst, src, dst_size); 1081 return lcpyT<wchar_t>(dst, src, dst_size);
1049 } 1082 }
1050 1083
1051 } // namespace base 1084 } // namespace base
OLDNEW
« no previous file with comments | « no previous file | base/strings/string_util_unittest.cc » ('j') | base/strings/string_util_unittest.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698