Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/strings/string_util.h" | 5 #include "base/strings/string_util.h" |
| 6 | 6 |
| 7 #include <ctype.h> | 7 #include <ctype.h> |
| 8 #include <errno.h> | 8 #include <errno.h> |
| 9 #include <math.h> | 9 #include <math.h> |
| 10 #include <stdarg.h> | 10 #include <stdarg.h> |
| (...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 705 kByteStringsUnlocalized[dimension]); | 705 kByteStringsUnlocalized[dimension]); |
| 706 } else { | 706 } else { |
| 707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, | 707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, |
| 708 kByteStringsUnlocalized[dimension]); | 708 kByteStringsUnlocalized[dimension]); |
| 709 } | 709 } |
| 710 | 710 |
| 711 return ASCIIToUTF16(buf); | 711 return ASCIIToUTF16(buf); |
| 712 } | 712 } |
| 713 | 713 |
| 714 // Runs in O(n) time in the length of |str|. | 714 // Runs in O(n) time in the length of |str|. |
| 715 template<class StringType> | 715 template <class StringType> |
| 716 void DoReplaceSubstringsAfterOffset(StringType* str, | 716 void DoReplaceSubstringsAfterOffset(StringType* str, |
| 717 size_t offset, | 717 size_t initial_offset, |
| 718 BasicStringPiece<StringType> find_this, | 718 BasicStringPiece<StringType> find_this, |
| 719 BasicStringPiece<StringType> replace_with, | 719 BasicStringPiece<StringType> replace_with, |
| 720 bool replace_all) { | 720 bool replace_all) { |
| 721 DCHECK(!find_this.empty()); | 721 DCHECK(!find_this.empty()); |
| 722 | 722 |
| 723 // If the find string doesn't appear, there's nothing to do. | 723 // If the find string doesn't appear, there's nothing to do. |
| 724 offset = str->find(find_this.data(), offset, find_this.size()); | 724 const size_t find_length = find_this.length(); |
| 725 if (offset == StringType::npos) | 725 size_t first_match = str->find(find_this.data(), initial_offset, find_length); |
| 726 if (first_match == StringType::npos) | |
| 726 return; | 727 return; |
| 727 | 728 |
| 728 // If we're only replacing one instance, there's no need to do anything | 729 // If we're only replacing one instance, there's no need to do anything |
| 729 // complicated. | 730 // complicated. |
| 730 size_t find_length = find_this.length(); | 731 const size_t replace_length = replace_with.length(); |
| 731 if (!replace_all) { | 732 if (!replace_all) { |
| 732 str->replace(offset, find_length, replace_with.data(), replace_with.size()); | 733 str->replace(first_match, find_length, replace_with.data(), replace_length); |
| 733 return; | 734 return; |
| 734 } | 735 } |
| 735 | 736 |
| 736 // If the find and replace strings are the same length, we can simply use | 737 // If the find and replace strings are the same length, we can simply use |
| 737 // replace() on each instance, and finish the entire operation in O(n) time. | 738 // replace() on each instance, and finish the entire operation in O(n) time. |
| 738 size_t replace_length = replace_with.length(); | |
| 739 if (find_length == replace_length) { | 739 if (find_length == replace_length) { |
| 740 do { | 740 auto* buffer = &((*str)[0]); |
|
danakj
2017/07/28 22:00:28
does str->data() not do what you want here?
Peter Kasting
2017/07/29 02:18:01
data() returns pointer-to-const until C++17, unfor
ncarter (slow)
2017/07/31 18:55:39
Yes, unfortunately.
| |
| 741 str->replace(offset, find_length, | 741 for (size_t offset = first_match; offset != StringType::npos; |
| 742 replace_with.data(), replace_with.size()); | 742 offset = str->find(find_this.data(), offset + replace_length, |
| 743 offset = str->find(find_this.data(), offset + replace_length, | 743 find_length)) { |
| 744 find_this.size()); | 744 StringType::traits_type::copy(buffer + offset, replace_with.data(), |
|
Peter Kasting
2017/07/28 08:29:24
Nit: Wonder if something like either "using traits
ncarter (slow)
2017/07/28 21:46:34
Great suggestion. Done, with CharTraits as the nam
| |
| 745 } while (offset != StringType::npos); | 745 replace_length); |
| 746 } | |
| 746 return; | 747 return; |
| 747 } | 748 } |
| 748 | 749 |
| 749 // Since the find and replace strings aren't the same length, a loop like the | 750 // Since the find and replace strings aren't the same length, a loop like the |
| 750 // one above would be O(n^2) in the worst case, as replace() will shift the | 751 // one above would be O(n^2) in the worst case, as replace() will shift the |
| 751 // entire remaining string each time. We need to be more clever to keep | 752 // entire remaining string each time. We need to be more clever to keep things |
| 752 // things O(n). | 753 // O(n). |
| 753 // | 754 // |
| 754 // If we're shortening the string, we can alternate replacements with shifting | 755 // When the string is being shortened, it's possible to just shift the matches |
| 755 // forward the intervening characters using memmove(). | 756 // down in one pass while finding, and truncate the length at the end of the |
| 757 // search. | |
| 758 // | |
| 759 // If the string is being lengthened, more work is required. The strategy used | |
| 760 // here is to make two find() passes through the string. The first pass counts | |
| 761 // the number of matches to determine the new size. The second pass will | |
| 762 // either construct the new string into a new buffer (if the existing buffer | |
| 763 // lacked capacity), or else -- if there is room -- create a region of scratch | |
| 764 // space after |first_match| by shifting the tail of the string to a higher | |
| 765 // index, and doing in-place moves from the tail to lower indices thereafter. | |
| 756 size_t str_length = str->length(); | 766 size_t str_length = str->length(); |
| 757 if (find_length > replace_length) { | 767 size_t expansion = 0; |
| 758 size_t write_offset = offset; | 768 if (replace_length > find_length) { |
| 759 do { | 769 // This operation lengthens the string; determine the new length by counting |
| 760 if (replace_length) { | 770 // matches. |
| 761 str->replace(write_offset, replace_length, | 771 const size_t expansion_per_match = (replace_length - find_length); |
| 762 replace_with.data(), replace_with.size()); | 772 size_t num_matches = 0; |
| 763 write_offset += replace_length; | 773 for (size_t match = first_match; match != StringType::npos; |
| 774 match = | |
| 775 str->find(find_this.data(), match + find_length, find_length)) { | |
| 776 expansion += expansion_per_match; | |
| 777 ++num_matches; | |
| 778 } | |
| 779 const size_t final_length = str_length + expansion; | |
| 780 | |
| 781 if (str->capacity() < final_length) { | |
| 782 // Since we'd have to realloc the string anyway, use a temporary to build | |
| 783 // the result. | |
| 784 StringType src; | |
| 785 str->swap(src); | |
| 786 str->reserve(final_length); | |
| 787 | |
| 788 size_t pos = 0; | |
| 789 size_t match = first_match; | |
| 790 while (true) { | |
|
Peter Kasting
2017/07/28 08:29:24
Nit: Or:
for (size_t match = first_match; ;
ncarter (slow)
2017/07/28 21:46:34
Done.
Peter Kasting
2017/07/29 02:18:01
I suggest adding that comment I proposed, or one l
ncarter (slow)
2017/07/31 18:55:39
Done.
| |
| 791 str->append(src, pos, match - pos); | |
| 792 str->append(replace_with.data(), replace_length); | |
| 793 pos = match + find_length; | |
| 794 if (!--num_matches) | |
| 795 break; | |
| 796 match = src.find(find_this.data(), pos, find_length); | |
| 764 } | 797 } |
| 765 size_t read_offset = offset + find_length; | 798 |
| 766 offset = std::min( | 799 // Handle substring after the final match. |
| 767 str->find(find_this.data(), read_offset, find_this.size()), | 800 str->append(src, pos, str_length - pos); |
| 768 str_length); | 801 return; |
| 769 size_t length = offset - read_offset; | 802 } |
| 770 if (length) { | 803 |
| 771 memmove(&(*str)[write_offset], &(*str)[read_offset], | 804 // Prepare for the copy/move loop below -- expand the string to its final |
| 772 length * sizeof(typename StringType::value_type)); | 805 // size by shifting the data after the first match to the end of the resized |
| 773 write_offset += length; | 806 // string. |
| 774 } | 807 size_t shift_src = first_match + find_length; |
| 775 } while (offset < str_length); | 808 size_t shift_dst = shift_src + expansion; |
| 776 str->resize(write_offset); | 809 |
| 777 return; | 810 // Big |expansion| factors (relative to |str_length|) require padding up to |
| 811 // |shift_dst|. | |
| 812 if (shift_dst > str_length) | |
| 813 str->resize(shift_dst); | |
| 814 | |
| 815 str->replace(shift_dst, str_length - shift_src, *str, shift_src, | |
| 816 str_length - shift_src); | |
| 817 str_length = final_length; | |
| 778 } | 818 } |
| 779 | 819 |
| 780 // We're lengthening the string. We can use alternating replacements and | 820 // We can alternate replacement and move operations. This won't overwrite the |
| 781 // memmove() calls like above, but we need to precalculate the final string | 821 // unsearched region of the string so long as |write_offset| <= |read_offset|; |
| 782 // length and then expand from back-to-front to avoid overwriting the string | 822 // that condition is always satisfied because: |
| 783 // as we're reading it, needing to shift, or having to copy to a second string | 823 // |
| 784 // temporarily. | 824 // (a) If the string is being shortened, |expansion| is zero and |
| 785 size_t first_match = offset; | 825 // |write_offset| grows slower than |read_offset|. |
| 826 // | |
| 827 // (b) If the string is being lengthened, |write_offset| grows faster than | |
| 828 // |read_offset|, but |expansion| is big enough so that |write_offset| | |
| 829 // will only catch up to |read_offset| at the point of the last match. | |
| 830 auto* buffer = &((*str)[0]); | |
|
danakj
2017/07/28 22:00:28
also data()?
| |
| 831 size_t write_offset = first_match; | |
| 832 size_t read_offset = first_match + expansion; | |
| 833 do { | |
| 834 if (replace_length) { | |
| 835 StringType::traits_type::copy(buffer + write_offset, replace_with.data(), | |
| 836 replace_length); | |
| 837 write_offset += replace_length; | |
| 838 } | |
| 839 read_offset += find_length; | |
| 840 size_t match = std::min( | |
|
danakj
2017/07/28 22:00:28
this min() call reads very dubiously, as npos is -
Peter Kasting
2017/07/29 02:18:01
FWIW, I've always thought of npos as SIZE_MAX, not
ncarter (slow)
2017/07/31 18:55:39
I added a comment, since it confused me at first t
| |
| 841 str->find(find_this.data(), read_offset, find_length), str_length); | |
| 842 size_t length = match - read_offset; | |
| 843 if (length) { | |
| 844 StringType::traits_type::move(buffer + write_offset, buffer + read_offset, | |
| 845 length); | |
| 846 write_offset += length; | |
| 847 read_offset += length; | |
| 848 } | |
| 849 } while (read_offset < str_length); | |
| 786 | 850 |
| 787 // First, calculate the final length and resize the string. | 851 // If we're shortening the string, truncate it now. |
| 788 size_t final_length = str_length; | 852 str->resize(write_offset); |
| 789 size_t expansion = replace_length - find_length; | |
| 790 size_t current_match; | |
| 791 do { | |
| 792 final_length += expansion; | |
| 793 // Minor optimization: save this offset into |current_match|, so that on | |
| 794 // exit from the loop, |current_match| will point at the last instance of | |
| 795 // the find string, and we won't need to find() it again immediately. | |
| 796 current_match = offset; | |
| 797 offset = str->find(find_this.data(), offset + find_length, | |
| 798 find_this.size()); | |
| 799 } while (offset != StringType::npos); | |
| 800 str->resize(final_length); | |
| 801 | |
| 802 // Now do the replacement loop, working backwards through the string. | |
| 803 for (size_t prev_match = str_length, write_offset = final_length; ; | |
| 804 current_match = str->rfind(find_this.data(), current_match - 1, | |
| 805 find_this.size())) { | |
| 806 size_t read_offset = current_match + find_length; | |
| 807 size_t length = prev_match - read_offset; | |
| 808 if (length) { | |
| 809 write_offset -= length; | |
| 810 memmove(&(*str)[write_offset], &(*str)[read_offset], | |
| 811 length * sizeof(typename StringType::value_type)); | |
| 812 } | |
| 813 write_offset -= replace_length; | |
| 814 str->replace(write_offset, replace_length, | |
| 815 replace_with.data(), replace_with.size()); | |
| 816 if (current_match == first_match) | |
| 817 return; | |
| 818 prev_match = current_match; | |
| 819 } | |
| 820 } | 853 } |
| 821 | 854 |
| 822 void ReplaceFirstSubstringAfterOffset(string16* str, | 855 void ReplaceFirstSubstringAfterOffset(string16* str, |
| 823 size_t start_offset, | 856 size_t start_offset, |
| 824 StringPiece16 find_this, | 857 StringPiece16 find_this, |
| 825 StringPiece16 replace_with) { | 858 StringPiece16 replace_with) { |
| 826 DoReplaceSubstringsAfterOffset<string16>( | 859 DoReplaceSubstringsAfterOffset<string16>( |
| 827 str, start_offset, find_this, replace_with, false); // Replace first. | 860 str, start_offset, find_this, replace_with, false); // Replace first. |
| 828 } | 861 } |
| 829 | 862 |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1042 } // namespace | 1075 } // namespace |
| 1043 | 1076 |
| 1044 size_t strlcpy(char* dst, const char* src, size_t dst_size) { | 1077 size_t strlcpy(char* dst, const char* src, size_t dst_size) { |
| 1045 return lcpyT<char>(dst, src, dst_size); | 1078 return lcpyT<char>(dst, src, dst_size); |
| 1046 } | 1079 } |
| 1047 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { | 1080 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { |
| 1048 return lcpyT<wchar_t>(dst, src, dst_size); | 1081 return lcpyT<wchar_t>(dst, src, dst_size); |
| 1049 } | 1082 } |
| 1050 | 1083 |
| 1051 } // namespace base | 1084 } // namespace base |
| OLD | NEW |