OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/utf_offset_string_conversions.h" | 5 #include "base/utf_offset_string_conversions.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | |
9 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
10 #include "base/string_piece.h" | 11 #include "base/string_piece.h" |
12 #include "base/third_party/icu/icu_utf.h" | |
11 #include "base/utf_string_conversion_utils.h" | 13 #include "base/utf_string_conversion_utils.h" |
12 | 14 |
13 using base::PrepareForUTF16Or32Output; | 15 using base::PrepareForUTF16Or32Output; |
14 using base::PrepareForUTF8Output; | 16 using base::PrepareForUTF8Output; |
15 using base::ReadUnicodeCharacter; | 17 using base::ReadUnicodeCharacter; |
16 using base::WriteUnicodeCharacter; | 18 using base::WriteUnicodeCharacter; |
17 | 19 |
20 namespace { | |
21 | |
18 // Converts the given source Unicode character type to the given destination | 22 // Converts the given source Unicode character type to the given destination |
19 // Unicode character type as a STL string. The given input buffer and size | 23 // Unicode character type as a STL string. The given input buffer and size |
20 // determine the source, and the given output STL string will be replaced by | 24 // determine the source, and the given output STL string will be replaced by |
21 // the result. | 25 // the result. |
22 template<typename SrcChar, typename DestStdString> | 26 template<typename SrcChar, typename DestStdString> |
23 bool ConvertUnicode(const SrcChar* src, | 27 bool ConvertUnicode(const SrcChar* src, |
24 size_t src_len, | 28 size_t src_len, |
25 DestStdString* output, | 29 DestStdString* output, |
26 std::vector<size_t>* offsets_for_adjustment) { | 30 std::vector<size_t>* offsets_for_adjustment) { |
27 if (offsets_for_adjustment) { | 31 if (offsets_for_adjustment) { |
(...skipping 21 matching lines...) Expand all Loading... | |
49 // character read, not after it (so that incrementing it in the loop | 53 // character read, not after it (so that incrementing it in the loop |
50 // increment will place it at the right location), so we need to account | 54 // increment will place it at the right location), so we need to account |
51 // for that in determining the amount that was read. | 55 // for that in determining the amount that was read. |
52 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, | 56 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, |
53 i - original_i + 1, chars_written)); | 57 i - original_i + 1, chars_written)); |
54 } | 58 } |
55 } | 59 } |
56 return success; | 60 return success; |
57 } | 61 } |
58 | 62 |
63 } // namespace | |
64 | |
65 bool IsValidCodePointIndex(const string16& s, size_t index) { | |
66 return index == 0 || index == s.length() || | |
67 !(CBU16_IS_TRAIL(s[index]) && CBU16_IS_LEAD(s[index - 1])); | |
68 } | |
69 | |
70 ptrdiff_t Utf16IndexToOffset(const string16& s, size_t base, size_t pos) { | |
71 DCHECK_LE(base, s.length()); | |
72 DCHECK_LE(pos, s.length()); | |
73 ptrdiff_t delta = 0; | |
74 while (base < pos) | |
75 delta += IsValidCodePointIndex(s, base++) ? 1 : 0; | |
76 while (pos < base) | |
77 delta -= IsValidCodePointIndex(s, pos++) ? 1 : 0; | |
78 return delta; | |
79 } | |
80 | |
81 size_t Utf16OffsetToIndex(const string16& s, size_t pos, ptrdiff_t offset) { | |
82 DCHECK_LE(pos, s.length()); | |
83 while (offset > 0 && pos < s.length()) | |
84 offset -= IsValidCodePointIndex(s, pos++) ? 1 : 0; | |
85 while (offset < 0 && pos > 0) | |
86 offset += IsValidCodePointIndex(s, --pos) ? 1 : 0; | |
87 // If offset != 0 then we ran off the edge of the string, which shouldn't | |
88 // happen but is handled anyway for safety. | |
89 DCHECK_EQ(offset, 0); | |
90 // Since the second half of a surrogate pair has "length" zero, there is an | |
91 // ambiguity in the returned position. Resolve it by always returning a valid | |
92 // index. | |
93 if (!IsValidCodePointIndex(s, pos)) | |
xji
2012/03/01 19:33:49
this line only reachable when |offset| == 0 at ent
benrg
2012/03/01 21:07:52
No, it's also reachable when offset > 0 and the de
| |
94 ++pos; | |
95 return pos; | |
96 } | |
97 | |
59 bool UTF8ToUTF16AndAdjustOffset(const char* src, | 98 bool UTF8ToUTF16AndAdjustOffset(const char* src, |
60 size_t src_len, | 99 size_t src_len, |
61 string16* output, | 100 string16* output, |
62 size_t* offset_for_adjustment) { | 101 size_t* offset_for_adjustment) { |
63 std::vector<size_t> offsets; | 102 std::vector<size_t> offsets; |
64 if (offset_for_adjustment) | 103 if (offset_for_adjustment) |
65 offsets.push_back(*offset_for_adjustment); | 104 offsets.push_back(*offset_for_adjustment); |
66 PrepareForUTF16Or32Output(src, src_len, output); | 105 PrepareForUTF16Or32Output(src, src_len, output); |
67 bool ret = ConvertUnicode(src, src_len, output, &offsets); | 106 bool ret = ConvertUnicode(src, src_len, output, &offsets); |
68 if (offset_for_adjustment) | 107 if (offset_for_adjustment) |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
158 if (*offset <= i->original_offset) | 197 if (*offset <= i->original_offset) |
159 break; | 198 break; |
160 if (*offset < (i->original_offset + i->original_length)) { | 199 if (*offset < (i->original_offset + i->original_length)) { |
161 *offset = string16::npos; | 200 *offset = string16::npos; |
162 return; | 201 return; |
163 } | 202 } |
164 adjustment += (i->original_length - i->output_length); | 203 adjustment += (i->original_length - i->output_length); |
165 } | 204 } |
166 *offset -= adjustment; | 205 *offset -= adjustment; |
167 } | 206 } |
OLD | NEW |