OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/strings/string_util.h" | 5 #include "base/strings/string_util.h" |
6 | 6 |
7 #include <ctype.h> | 7 #include <ctype.h> |
8 #include <errno.h> | 8 #include <errno.h> |
9 #include <math.h> | 9 #include <math.h> |
10 #include <stdarg.h> | 10 #include <stdarg.h> |
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
705 kByteStringsUnlocalized[dimension]); | 705 kByteStringsUnlocalized[dimension]); |
706 } else { | 706 } else { |
707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, | 707 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, |
708 kByteStringsUnlocalized[dimension]); | 708 kByteStringsUnlocalized[dimension]); |
709 } | 709 } |
710 | 710 |
711 return ASCIIToUTF16(buf); | 711 return ASCIIToUTF16(buf); |
712 } | 712 } |
713 | 713 |
714 // Runs in O(n) time in the length of |str|. | 714 // Runs in O(n) time in the length of |str|. |
715 template<class StringType> | 715 template <class StringType> |
716 void DoReplaceSubstringsAfterOffset(StringType* str, | 716 void DoReplaceSubstringsAfterOffset(StringType* str, |
717 size_t offset, | 717 size_t initial_offset, |
718 BasicStringPiece<StringType> find_this, | 718 BasicStringPiece<StringType> find_this, |
719 BasicStringPiece<StringType> replace_with, | 719 BasicStringPiece<StringType> replace_with, |
720 bool replace_all) { | 720 bool replace_all) { |
721 DCHECK(!find_this.empty()); | 721 DCHECK(!find_this.empty()); |
722 | 722 |
723 // If the find string doesn't appear, there's nothing to do. | 723 // If the find string doesn't appear, there's nothing to do. |
724 offset = str->find(find_this.data(), offset, find_this.size()); | 724 size_t first_match = |
725 if (offset == StringType::npos) | 725 str->find(find_this.data(), initial_offset, find_this.length()); |
726 if (first_match == StringType::npos) | |
726 return; | 727 return; |
727 | 728 |
728 // If we're only replacing one instance, there's no need to do anything | 729 // If we're only replacing one instance, there's no need to do anything |
729 // complicated. | 730 // complicated. |
730 size_t find_length = find_this.length(); | 731 const size_t find_length = find_this.length(); |
732 const size_t replace_length = replace_with.length(); | |
731 if (!replace_all) { | 733 if (!replace_all) { |
732 str->replace(offset, find_length, replace_with.data(), replace_with.size()); | 734 str->replace(first_match, find_length, replace_with.data(), replace_length); |
733 return; | 735 return; |
734 } | 736 } |
735 | 737 |
736 // If the find and replace strings are the same length, we can simply use | 738 // If the find and replace strings are the same length, we can simply use |
737 // replace() on each instance, and finish the entire operation in O(n) time. | 739 // replace() on each instance, and finish the entire operation in O(n) time. |
738 size_t replace_length = replace_with.length(); | |
739 if (find_length == replace_length) { | 740 if (find_length == replace_length) { |
741 size_t offset = first_match; | |
Peter Kasting
2017/07/27 01:09:57
Nit: It does one unnecessary comparison at the sta
ncarter (slow)
2017/07/28 02:34:25
Done.
FWIW it looks like MSVC manages to omit the
| |
740 do { | 742 do { |
741 str->replace(offset, find_length, | 743 str->replace(offset, find_length, replace_with.data(), replace_length); |
742 replace_with.data(), replace_with.size()); | |
743 offset = str->find(find_this.data(), offset + replace_length, | 744 offset = str->find(find_this.data(), offset + replace_length, |
744 find_this.size()); | 745 find_this.length()); |
danakj
2017/07/27 15:55:30
nit: this could be written find_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
| |
745 } while (offset != StringType::npos); | 746 } while (offset != StringType::npos); |
746 return; | 747 return; |
747 } | 748 } |
748 | 749 |
749 // Since the find and replace strings aren't the same length, a loop like the | 750 // Since the find and replace strings aren't the same length, a loop like the |
750 // one above would be O(n^2) in the worst case, as replace() will shift the | 751 // one above would be O(n^2) in the worst case, as replace() will shift the |
751 // entire remaining string each time. We need to be more clever to keep | 752 // entire remaining string each time. We need to be more clever to keep things |
752 // things O(n). | 753 // O(n). |
Peter Kasting
2017/07/27 05:25:14
Nit: Somewhere, we need comments that say what the
ncarter (slow)
2017/07/28 02:34:25
Done.
| |
753 // | |
754 // If we're shortening the string, we can alternate replacements with shifting | |
755 // forward the intervening characters using memmove(). | |
756 size_t str_length = str->length(); | 754 size_t str_length = str->length(); |
757 if (find_length > replace_length) { | 755 size_t expansion = 0; |
758 size_t write_offset = offset; | 756 if (replace_length > find_length) { |
759 do { | 757 // This operation lengthens the string; determine the new length by counting |
760 if (replace_length) { | 758 // matches. |
761 str->replace(write_offset, replace_length, | 759 size_t num_matches = 0; |
762 replace_with.data(), replace_with.size()); | 760 for (size_t match = first_match; match != StringType::npos; |
danakj
2017/07/27 15:55:30
we should be consistent and either for() always li
ncarter (slow)
2017/07/28 02:34:24
Switched to a for loop above, based on the convers
| |
763 write_offset += replace_length; | 761 match = str->find(find_this.data(), match + find_length, |
762 find_this.length())) { | |
danakj
2017/07/27 15:55:30
nit: find_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
| |
763 num_matches++; | |
Peter Kasting
2017/07/27 05:25:14
Nit: Totally personal style, but postincrements re
ncarter (slow)
2017/07/28 02:34:24
Done.
| |
764 } | |
765 expansion = (replace_length - find_length) * num_matches; | |
danakj
2017/07/27 15:55:30
this is nitty, but as per peter's comment below, i
ncarter (slow)
2017/07/28 02:34:24
I've eliminated the imul instruction, by accumulat
| |
766 const size_t final_length = str_length + expansion; | |
767 | |
768 if (str->capacity() < final_length) { | |
ncarter (slow)
2017/07/26 23:50:14
Using a temporary seems to result in fewer copies
Peter Kasting
2017/07/27 01:09:57
I tried to optimize the original code to do as few
danakj
2017/07/27 15:55:30
I'd like to understand that we really want to use
danakj
2017/07/27 15:58:48
Oops, I meant memmove() here as it's written expli
Peter Kasting
2017/07/27 22:09:47
I don't think memmove() usually copies to a temp b
Peter Kasting
2017/07/27 22:11:21
More confirmation of this: see the answer (and com
ncarter (slow)
2017/07/28 02:34:25
I agree with danakj's intuition that a temp seems
Peter Kasting
2017/07/28 08:29:24
By 1, right? Because we replace "copy to back" wi
danakj
2017/07/28 15:53:48
In my profiling work in chromium, code has been co
ncarter (slow)
2017/07/28 21:46:34
No, by a difference of 2. In the worse case the wh
Peter Kasting
2017/07/29 02:18:01
To summarize your explanation: if we have to grow
ncarter (slow)
2017/07/31 18:55:39
Agree.
FWIW, I figured out the algorithm for the
| |
769 // Since we'd have to realloc the string anyway, use a temporary to build | |
770 // the result. | |
771 StringType src; | |
772 str->swap(src); | |
773 str->reserve(final_length); | |
ncarter (slow)
2017/07/26 23:50:14
Should we worry that using a temporary here will b
Peter Kasting
2017/07/27 01:09:57
See reply above.
ncarter (slow)
2017/07/28 02:34:25
Acknowledged.
FWIW this article is what was on my
| |
774 | |
775 size_t pos = 0; | |
776 for (size_t i = 0; i < num_matches; ++i) { | |
Peter Kasting
2017/07/27 05:25:14
Nit: I feel like we ought to be able to write this
ncarter (slow)
2017/07/28 02:34:24
Writing it as you suggest means there's an extra c
| |
777 size_t match = | |
778 (i == 0) ? first_match | |
779 : src.find(find_this.data(), pos, find_this.length()); | |
danakj
2017/07/27 15:55:30
nit: find_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
| |
780 str->append(src, pos, match - pos); | |
781 str->append(replace_with.data(), replace_with.length()); | |
danakj
2017/07/27 15:55:30
nit: replace_length?
ncarter (slow)
2017/07/28 02:34:25
Done.
| |
782 pos = match + find_length; | |
764 } | 783 } |
765 size_t read_offset = offset + find_length; | 784 |
766 offset = std::min( | 785 // Handle substring after the final match. |
767 str->find(find_this.data(), read_offset, find_this.size()), | 786 str->append(src, pos, str_length - pos); |
768 str_length); | 787 return; |
769 size_t length = offset - read_offset; | 788 } |
770 if (length) { | 789 |
771 memmove(&(*str)[write_offset], &(*str)[read_offset], | 790 // Prepare for the memmove loop below -- expand the string to its final size |
772 length * sizeof(typename StringType::value_type)); | 791 // by shifting the data after the first match to the end of the resized |
773 write_offset += length; | 792 // string. |
774 } | 793 size_t shift_src = first_match + find_length; |
775 } while (offset < str_length); | 794 size_t shift_dst = shift_src + expansion; |
776 str->resize(write_offset); | 795 |
777 return; | 796 // Big |expansion| factors (relative to |str_length|) require padding up to |
797 // |shift_dst|. | |
798 if (shift_dst > str_length) | |
799 str->resize(shift_dst); | |
800 | |
801 str->replace(shift_dst, str_length - shift_src, *str, shift_src, | |
802 str_length - shift_src); | |
Peter Kasting
2017/07/27 05:25:14
Nit: We should probably be consistent about whethe
ncarter (slow)
2017/07/28 02:34:25
This particular replace can't be replaced with mem
| |
803 str_length = final_length; | |
778 } | 804 } |
779 | 805 |
780 // We're lengthening the string. We can use alternating replacements and | 806 // We can alternate replacements with memmove. This won't overwrite the source |
781 // memmove() calls like above, but we need to precalculate the final string | 807 // region so long as |write_offset| <= |read_offset|; that is guaranteed |
782 // length and then expand from back-to-front to avoid overwriting the string | 808 // because: |
783 // as we're reading it, needing to shift, or having to copy to a second string | 809 // |
Peter Kasting
2017/07/27 01:09:57
This "needing to shift" is, AFAICT, exactly what y
Peter Kasting
2017/07/27 05:25:14
That said, the only way I can think of to do this
ncarter (slow)
2017/07/28 02:34:25
I'd thought about this too. The best I came up wit
| |
784 // temporarily. | 810 // (a) If the string is being shortened, |expansion| is zero and |
785 size_t first_match = offset; | 811 // |write_offset| grows slower than |read_offset|. |
786 | 812 // |
787 // First, calculate the final length and resize the string. | 813 // (b) If the string is being lengthened, |write_offset| grows faster than |
788 size_t final_length = str_length; | 814 // |read_offset|, but |expansion| is big enough so that |write_offset| |
789 size_t expansion = replace_length - find_length; | 815 // will only catch up to |read_offset| at the point of the last match. |
790 size_t current_match; | 816 size_t write_offset = first_match; |
817 size_t read_offset = first_match + expansion; | |
791 do { | 818 do { |
ncarter (slow)
2017/07/26 23:50:14
Note that this is essentially the old 'string is s
ncarter (slow)
2017/07/28 02:34:25
Done.
| |
792 final_length += expansion; | 819 if (replace_length) { |
793 // Minor optimization: save this offset into |current_match|, so that on | 820 str->replace(write_offset, replace_length, replace_with.data(), |
794 // exit from the loop, |current_match| will point at the last instance of | 821 replace_length); |
795 // the find string, and we won't need to find() it again immediately. | 822 write_offset += replace_length; |
796 current_match = offset; | 823 } |
797 offset = str->find(find_this.data(), offset + find_length, | 824 read_offset += find_length; |
798 find_this.size()); | 825 size_t match = |
799 } while (offset != StringType::npos); | 826 std::min(str->find(find_this.data(), read_offset, find_this.length()), |
800 str->resize(final_length); | 827 str_length); |
801 | 828 size_t length = match - read_offset; |
802 // Now do the replacement loop, working backwards through the string. | |
803 for (size_t prev_match = str_length, write_offset = final_length; ; | |
804 current_match = str->rfind(find_this.data(), current_match - 1, | |
805 find_this.size())) { | |
806 size_t read_offset = current_match + find_length; | |
807 size_t length = prev_match - read_offset; | |
808 if (length) { | 829 if (length) { |
809 write_offset -= length; | |
810 memmove(&(*str)[write_offset], &(*str)[read_offset], | 830 memmove(&(*str)[write_offset], &(*str)[read_offset], |
ncarter (slow)
2017/07/26 23:50:14
Any reason we preferred memmove over string::repla
Peter Kasting
2017/07/27 01:09:57
I don't recall having one, just "I know I'm moving
ncarter (slow)
2017/07/28 02:34:25
I've switched to StringType::traits_type::move/cop
Peter Kasting
2017/07/28 08:29:24
It's more readable too. +1.
| |
811 length * sizeof(typename StringType::value_type)); | 831 length * sizeof(typename StringType::value_type)); |
832 write_offset += length; | |
833 read_offset += length; | |
812 } | 834 } |
813 write_offset -= replace_length; | 835 } while (read_offset < str_length); |
814 str->replace(write_offset, replace_length, | 836 |
815 replace_with.data(), replace_with.size()); | 837 // If we're shortening the string, truncate it now. |
816 if (current_match == first_match) | 838 str->resize(write_offset); |
817 return; | 839 |
818 prev_match = current_match; | 840 return; |
Peter Kasting
2017/07/27 05:25:14
Nit: Trailing return unnecessary
ncarter (slow)
2017/07/28 02:34:24
Done.
| |
819 } | |
820 } | 841 } |
821 | 842 |
822 void ReplaceFirstSubstringAfterOffset(string16* str, | 843 void ReplaceFirstSubstringAfterOffset(string16* str, |
823 size_t start_offset, | 844 size_t start_offset, |
824 StringPiece16 find_this, | 845 StringPiece16 find_this, |
825 StringPiece16 replace_with) { | 846 StringPiece16 replace_with) { |
826 DoReplaceSubstringsAfterOffset<string16>( | 847 DoReplaceSubstringsAfterOffset<string16>( |
827 str, start_offset, find_this, replace_with, false); // Replace first. | 848 str, start_offset, find_this, replace_with, false); // Replace first. |
828 } | 849 } |
829 | 850 |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1042 } // namespace | 1063 } // namespace |
1043 | 1064 |
1044 size_t strlcpy(char* dst, const char* src, size_t dst_size) { | 1065 size_t strlcpy(char* dst, const char* src, size_t dst_size) { |
1045 return lcpyT<char>(dst, src, dst_size); | 1066 return lcpyT<char>(dst, src, dst_size); |
1046 } | 1067 } |
1047 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { | 1068 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { |
1048 return lcpyT<wchar_t>(dst, src, dst_size); | 1069 return lcpyT<wchar_t>(dst, src, dst_size); |
1049 } | 1070 } |
1050 | 1071 |
1051 } // namespace base | 1072 } // namespace base |
OLD | NEW |