OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/utf_offset_string_conversions.h" | 5 #include "base/utf_offset_string_conversions.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | |
9 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
10 #include "base/string_piece.h" | 11 #include "base/string_piece.h" |
12 #include "base/third_party/icu/icu_utf.h" | |
11 #include "base/utf_string_conversion_utils.h" | 13 #include "base/utf_string_conversion_utils.h" |
12 | 14 |
13 using base::PrepareForUTF16Or32Output; | 15 using base::PrepareForUTF16Or32Output; |
14 using base::PrepareForUTF8Output; | 16 using base::PrepareForUTF8Output; |
15 using base::ReadUnicodeCharacter; | 17 using base::ReadUnicodeCharacter; |
16 using base::WriteUnicodeCharacter; | 18 using base::WriteUnicodeCharacter; |
17 | 19 |
20 namespace { | |
21 | |
18 // Converts the given source Unicode character type to the given destination | 22 // Converts the given source Unicode character type to the given destination |
19 // Unicode character type as a STL string. The given input buffer and size | 23 // Unicode character type as a STL string. The given input buffer and size |
20 // determine the source, and the given output STL string will be replaced by | 24 // determine the source, and the given output STL string will be replaced by |
21 // the result. | 25 // the result. |
22 template<typename SrcChar, typename DestStdString> | 26 template<typename SrcChar, typename DestStdString> |
23 bool ConvertUnicode(const SrcChar* src, | 27 bool ConvertUnicode(const SrcChar* src, |
24 size_t src_len, | 28 size_t src_len, |
25 DestStdString* output, | 29 DestStdString* output, |
26 std::vector<size_t>* offsets_for_adjustment) { | 30 std::vector<size_t>* offsets_for_adjustment) { |
27 if (offsets_for_adjustment) { | 31 if (offsets_for_adjustment) { |
(...skipping 21 matching lines...) Expand all Loading... | |
49 // character read, not after it (so that incrementing it in the loop | 53 // character read, not after it (so that incrementing it in the loop |
50 // increment will place it at the right location), so we need to account | 54 // increment will place it at the right location), so we need to account |
51 // for that in determining the amount that was read. | 55 // for that in determining the amount that was read. |
52 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, | 56 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, |
53 i - original_i + 1, chars_written)); | 57 i - original_i + 1, chars_written)); |
54 } | 58 } |
55 } | 59 } |
56 return success; | 60 return success; |
57 } | 61 } |
58 | 62 |
63 } // namespace | |
64 | |
65 bool IsValidUtf16Index(const string16& s, size_t index) { | |
66 return index == 0 || index == s.length() || | |
67 !(CBU16_IS_TRAIL(s[index]) && CBU16_IS_LEAD(s[index - 1])); | |
68 } | |
69 | |
70 ptrdiff_t Utf16IndexToOffset(const string16& s, size_t from, size_t to) { | |
71 DCHECK_LE(from, s.length()); | |
72 DCHECK_LE(to, s.length()); | |
73 ptrdiff_t delta = 0; | |
74 while (from < to) | |
75 delta += IsValidUtf16Index(s, from++); | |
msw
2012/02/22 00:33:26
This adds a bool? Can you make it a ternary and ex
benrg
2012/02/24 19:07:44
Done.
| |
76 while (to < from) | |
77 delta -= IsValidUtf16Index(s, to++); | |
78 return delta; | |
79 } | |
80 | |
81 size_t Utf16OffsetToIndex(const string16& s, size_t pos, ptrdiff_t offset) { | |
82 DCHECK_LE(pos, s.length()); | |
83 while (offset > 0 && pos < s.length()) | |
84 offset -= IsValidUtf16Index(s, pos++); | |
85 while (offset < 0 && pos > 0) | |
86 offset += IsValidUtf16Index(s, --pos); | |
87 // If offset != 0 then we ran off the edge of the string, which shouldn't | |
88 // happen but is handled anyway for safety. | |
89 DCHECK(offset == 0); | |
msw
2012/02/22 00:33:26
DCHECK_EQ
| |
90 return pos; | |
91 } | |
92 | |
59 bool UTF8ToUTF16AndAdjustOffset(const char* src, | 93 bool UTF8ToUTF16AndAdjustOffset(const char* src, |
60 size_t src_len, | 94 size_t src_len, |
61 string16* output, | 95 string16* output, |
62 size_t* offset_for_adjustment) { | 96 size_t* offset_for_adjustment) { |
63 std::vector<size_t> offsets; | 97 std::vector<size_t> offsets; |
64 if (offset_for_adjustment) | 98 if (offset_for_adjustment) |
65 offsets.push_back(*offset_for_adjustment); | 99 offsets.push_back(*offset_for_adjustment); |
66 PrepareForUTF16Or32Output(src, src_len, output); | 100 PrepareForUTF16Or32Output(src, src_len, output); |
67 bool ret = ConvertUnicode(src, src_len, output, &offsets); | 101 bool ret = ConvertUnicode(src, src_len, output, &offsets); |
68 if (offset_for_adjustment) | 102 if (offset_for_adjustment) |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
158 if (*offset <= i->original_offset) | 192 if (*offset <= i->original_offset) |
159 break; | 193 break; |
160 if (*offset < (i->original_offset + i->original_length)) { | 194 if (*offset < (i->original_offset + i->original_length)) { |
161 *offset = string16::npos; | 195 *offset = string16::npos; |
162 return; | 196 return; |
163 } | 197 } |
164 adjustment += (i->original_length - i->output_length); | 198 adjustment += (i->original_length - i->output_length); |
165 } | 199 } |
166 *offset -= adjustment; | 200 *offset -= adjustment; |
167 } | 201 } |
OLD | NEW |