| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/utf_offset_string_conversions.h" | 5 #include "base/utf_offset_string_conversions.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/logging.h" |
| 9 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| 10 #include "base/string_piece.h" | 11 #include "base/string_piece.h" |
| 12 #include "base/third_party/icu/icu_utf.h" |
| 11 #include "base/utf_string_conversion_utils.h" | 13 #include "base/utf_string_conversion_utils.h" |
| 12 | 14 |
| 13 using base::PrepareForUTF16Or32Output; | 15 using base::PrepareForUTF16Or32Output; |
| 14 using base::PrepareForUTF8Output; | 16 using base::PrepareForUTF8Output; |
| 15 using base::ReadUnicodeCharacter; | 17 using base::ReadUnicodeCharacter; |
| 16 using base::WriteUnicodeCharacter; | 18 using base::WriteUnicodeCharacter; |
| 17 | 19 |
| 20 namespace { |
| 21 |
| 18 // Converts the given source Unicode character type to the given destination | 22 // Converts the given source Unicode character type to the given destination |
| 19 // Unicode character type as a STL string. The given input buffer and size | 23 // Unicode character type as a STL string. The given input buffer and size |
| 20 // determine the source, and the given output STL string will be replaced by | 24 // determine the source, and the given output STL string will be replaced by |
| 21 // the result. | 25 // the result. |
| 22 template<typename SrcChar, typename DestStdString> | 26 template<typename SrcChar, typename DestStdString> |
| 23 bool ConvertUnicode(const SrcChar* src, | 27 bool ConvertUnicode(const SrcChar* src, |
| 24 size_t src_len, | 28 size_t src_len, |
| 25 DestStdString* output, | 29 DestStdString* output, |
| 26 std::vector<size_t>* offsets_for_adjustment) { | 30 std::vector<size_t>* offsets_for_adjustment) { |
| 27 if (offsets_for_adjustment) { | 31 if (offsets_for_adjustment) { |
| (...skipping 21 matching lines...) Expand all Loading... |
| 49 // character read, not after it (so that incrementing it in the loop | 53 // character read, not after it (so that incrementing it in the loop |
| 50 // increment will place it at the right location), so we need to account | 54 // increment will place it at the right location), so we need to account |
| 51 // for that in determining the amount that was read. | 55 // for that in determining the amount that was read. |
| 52 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, | 56 offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i, |
| 53 i - original_i + 1, chars_written)); | 57 i - original_i + 1, chars_written)); |
| 54 } | 58 } |
| 55 } | 59 } |
| 56 return success; | 60 return success; |
| 57 } | 61 } |
| 58 | 62 |
| 63 } // namespace |
| 64 |
| 65 bool IsValidCodePointIndex(const string16& s, size_t index) { |
| 66 return index == 0 || index == s.length() || |
| 67 !(CBU16_IS_TRAIL(s[index]) && CBU16_IS_LEAD(s[index - 1])); |
| 68 } |
| 69 |
| 70 ptrdiff_t Utf16IndexToOffset(const string16& s, size_t base, size_t pos) { |
| 71 DCHECK_LE(base, s.length()); |
| 72 DCHECK_LE(pos, s.length()); |
| 73 ptrdiff_t delta = 0; |
| 74 while (base < pos) |
| 75 delta += IsValidCodePointIndex(s, base++) ? 1 : 0; |
| 76 while (pos < base) |
| 77 delta -= IsValidCodePointIndex(s, pos++) ? 1 : 0; |
| 78 return delta; |
| 79 } |
| 80 |
| 81 size_t Utf16OffsetToIndex(const string16& s, size_t pos, ptrdiff_t offset) { |
| 82 DCHECK_LE(pos, s.length()); |
| 83 while (offset > 0 && pos < s.length()) |
| 84 offset -= IsValidCodePointIndex(s, pos++) ? 1 : 0; |
| 85 while (offset < 0 && pos > 0) |
| 86 offset += IsValidCodePointIndex(s, --pos) ? 1 : 0; |
| 87 // If offset != 0 then we ran off the edge of the string, which shouldn't |
| 88 // happen but is handled anyway for safety. |
| 89 DCHECK_EQ(offset, 0); |
| 90 // Since the second half of a surrogate pair has "length" zero, there is an |
| 91 // ambiguity in the returned position. Resolve it by always returning a valid |
| 92 // index. |
| 93 if (!IsValidCodePointIndex(s, pos)) |
| 94 ++pos; |
| 95 return pos; |
| 96 } |
| 97 |
| 59 bool UTF8ToUTF16AndAdjustOffset(const char* src, | 98 bool UTF8ToUTF16AndAdjustOffset(const char* src, |
| 60 size_t src_len, | 99 size_t src_len, |
| 61 string16* output, | 100 string16* output, |
| 62 size_t* offset_for_adjustment) { | 101 size_t* offset_for_adjustment) { |
| 63 std::vector<size_t> offsets; | 102 std::vector<size_t> offsets; |
| 64 if (offset_for_adjustment) | 103 if (offset_for_adjustment) |
| 65 offsets.push_back(*offset_for_adjustment); | 104 offsets.push_back(*offset_for_adjustment); |
| 66 PrepareForUTF16Or32Output(src, src_len, output); | 105 PrepareForUTF16Or32Output(src, src_len, output); |
| 67 bool ret = ConvertUnicode(src, src_len, output, &offsets); | 106 bool ret = ConvertUnicode(src, src_len, output, &offsets); |
| 68 if (offset_for_adjustment) | 107 if (offset_for_adjustment) |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 158 if (*offset <= i->original_offset) | 197 if (*offset <= i->original_offset) |
| 159 break; | 198 break; |
| 160 if (*offset < (i->original_offset + i->original_length)) { | 199 if (*offset < (i->original_offset + i->original_length)) { |
| 161 *offset = string16::npos; | 200 *offset = string16::npos; |
| 162 return; | 201 return; |
| 163 } | 202 } |
| 164 adjustment += (i->original_length - i->output_length); | 203 adjustment += (i->original_length - i->output_length); |
| 165 } | 204 } |
| 166 *offset -= adjustment; | 205 *offset -= adjustment; |
| 167 } | 206 } |
| OLD | NEW |