| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/utf_offset_string_conversions.h" | 5 #include "base/utf_offset_string_conversions.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/scoped_ptr.h" | 9 #include "base/scoped_ptr.h" |
| 10 #include "base/string_piece.h" | 10 #include "base/string_piece.h" |
| 11 #include "base/utf_string_conversion_utils.h" | 11 #include "base/utf_string_conversion_utils.h" |
| 12 | 12 |
| 13 using base::PrepareForUTF16Or32Output; | 13 using base::PrepareForUTF16Or32Output; |
| 14 using base::ReadUnicodeCharacter; | 14 using base::ReadUnicodeCharacter; |
| 15 using base::WriteUnicodeCharacter; | 15 using base::WriteUnicodeCharacter; |
| 16 | 16 |
| 17 // Generalized Unicode converter ----------------------------------------------- | |
| 18 | |
| 19 // Converts the given source Unicode character type to the given destination | 17 // Converts the given source Unicode character type to the given destination |
| 20 // Unicode character type as a STL string. The given input buffer and size | 18 // Unicode character type as a STL string. The given input buffer and size |
| 21 // determine the source, and the given output STL string will be replaced by | 19 // determine the source, and the given output STL string will be replaced by |
| 22 // the result. | 20 // the result. |
| 23 template<typename SRC_CHAR> | 21 bool ConvertUnicode(const char* src, |
| 24 bool ConvertUnicode(const SRC_CHAR* src, | |
| 25 size_t src_len, | 22 size_t src_len, |
| 26 std::wstring* output, | 23 string16* output, |
| 27 std::vector<size_t>* offsets_for_adjustment) { | 24 std::vector<size_t>* offsets_for_adjustment) { |
| 28 if (offsets_for_adjustment) { | 25 if (offsets_for_adjustment) { |
| 29 std::for_each(offsets_for_adjustment->begin(), | 26 std::for_each(offsets_for_adjustment->begin(), |
| 30 offsets_for_adjustment->end(), | 27 offsets_for_adjustment->end(), |
| 31 LimitOffset<std::wstring>(src_len)); | 28 LimitOffset<string16>(src_len)); |
| 32 } | 29 } |
| 33 | 30 |
| 34 // ICU requires 32-bit numbers. | 31 // ICU requires 32-bit numbers. |
| 35 bool success = true; | 32 bool success = true; |
| 36 AdjustOffset::Adjustments adjustments; | 33 AdjustOffset::Adjustments adjustments; |
| 37 int32 src_len32 = static_cast<int32>(src_len); | 34 int32 src_len32 = static_cast<int32>(src_len); |
| 38 for (int32 i = 0; i < src_len32; i++) { | 35 for (int32 i = 0; i < src_len32; i++) { |
| 39 uint32 code_point; | 36 uint32 code_point; |
| 40 size_t original_i = i; | 37 size_t original_i = i; |
| 41 size_t chars_written = 0; | 38 size_t chars_written = 0; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 58 // Make offset adjustment. | 55 // Make offset adjustment. |
| 59 if (offsets_for_adjustment && !adjustments.empty()) { | 56 if (offsets_for_adjustment && !adjustments.empty()) { |
| 60 std::for_each(offsets_for_adjustment->begin(), | 57 std::for_each(offsets_for_adjustment->begin(), |
| 61 offsets_for_adjustment->end(), | 58 offsets_for_adjustment->end(), |
| 62 AdjustOffset(adjustments)); | 59 AdjustOffset(adjustments)); |
| 63 } | 60 } |
| 64 | 61 |
| 65 return success; | 62 return success; |
| 66 } | 63 } |
| 67 | 64 |
| 68 // UTF-8 <-> Wide -------------------------------------------------------------- | 65 bool UTF8ToUTF16AndAdjustOffset(const char* src, |
| 69 | 66 size_t src_len, |
| 70 bool UTF8ToWideAndAdjustOffset(const char* src, | 67 string16* output, |
| 71 size_t src_len, | 68 size_t* offset_for_adjustment) { |
| 72 std::wstring* output, | |
| 73 size_t* offset_for_adjustment) { | |
| 74 std::vector<size_t> offsets; | 69 std::vector<size_t> offsets; |
| 75 if (offset_for_adjustment) | 70 if (offset_for_adjustment) |
| 76 offsets.push_back(*offset_for_adjustment); | 71 offsets.push_back(*offset_for_adjustment); |
| 77 PrepareForUTF16Or32Output(src, src_len, output); | 72 PrepareForUTF16Or32Output(src, src_len, output); |
| 78 bool ret = ConvertUnicode(src, src_len, output, &offsets); | 73 bool ret = ConvertUnicode(src, src_len, output, &offsets); |
| 79 if (offset_for_adjustment) | 74 if (offset_for_adjustment) |
| 80 *offset_for_adjustment = offsets[0]; | 75 *offset_for_adjustment = offsets[0]; |
| 81 return ret; | 76 return ret; |
| 82 } | 77 } |
| 83 | 78 |
| 84 bool UTF8ToWideAndAdjustOffsets(const char* src, | 79 bool UTF8ToUTF16AndAdjustOffsets(const char* src, |
| 85 size_t src_len, | 80 size_t src_len, |
| 86 std::wstring* output, | 81 string16* output, |
| 87 std::vector<size_t>* offsets_for_adjustment) { | 82 std::vector<size_t>* offsets_for_adjustment) { |
| 88 PrepareForUTF16Or32Output(src, src_len, output); | 83 PrepareForUTF16Or32Output(src, src_len, output); |
| 89 return ConvertUnicode(src, src_len, output, offsets_for_adjustment); | 84 return ConvertUnicode(src, src_len, output, offsets_for_adjustment); |
| 90 } | 85 } |
| 91 | 86 |
| 92 std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8, | 87 string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8, |
| 93 size_t* offset_for_adjustment) { | |
| 94 std::vector<size_t> offsets; | |
| 95 if (offset_for_adjustment) | |
| 96 offsets.push_back(*offset_for_adjustment); | |
| 97 std::wstring result; | |
| 98 UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result, | |
| 99 &offsets); | |
| 100 if (offset_for_adjustment) | |
| 101 *offset_for_adjustment = offsets[0]; | |
| 102 return result; | |
| 103 } | |
| 104 | |
| 105 std::wstring UTF8ToWideAndAdjustOffsets(const base::StringPiece& utf8, | |
| 106 std::vector<size_t>* | |
| 107 offsets_for_adjustment) { | |
| 108 std::wstring result; | |
| 109 UTF8ToWideAndAdjustOffsets(utf8.data(), utf8.length(), &result, | |
| 110 offsets_for_adjustment); | |
| 111 return result; | |
| 112 } | |
| 113 | |
| 114 // UTF-16 <-> Wide ------------------------------------------------------------- | |
| 115 | |
| 116 #if defined(WCHAR_T_IS_UTF16) | |
| 117 | |
| 118 // When wide == UTF-16, then conversions are a NOP. | |
| 119 bool UTF16ToWideAndAdjustOffset(const char16* src, | |
| 120 size_t src_len, | |
| 121 std::wstring* output, | |
| 122 size_t* offset_for_adjustment) { | |
| 123 output->assign(src, src_len); | |
| 124 if (offset_for_adjustment && (*offset_for_adjustment >= src_len)) | |
| 125 *offset_for_adjustment = std::wstring::npos; | |
| 126 return true; | |
| 127 } | |
| 128 | |
| 129 bool UTF16ToWideAndAdjustOffsets(const char16* src, | |
| 130 size_t src_len, | |
| 131 std::wstring* output, | |
| 132 std::vector<size_t>* offsets_for_adjustment) { | |
| 133 output->assign(src, src_len); | |
| 134 if (offsets_for_adjustment) { | |
| 135 std::for_each(offsets_for_adjustment->begin(), | |
| 136 offsets_for_adjustment->end(), | |
| 137 LimitOffset<std::wstring>(src_len)); | |
| 138 } | |
| 139 return true; | |
| 140 } | |
| 141 | |
| 142 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, | |
| 143 size_t* offset_for_adjustment) { | |
| 144 if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length())) | |
| 145 *offset_for_adjustment = std::wstring::npos; | |
| 146 return utf16; | |
| 147 } | |
| 148 | |
| 149 std::wstring UTF16ToWideAndAdjustOffsets( | |
| 150 const string16& utf16, | |
| 151 std::vector<size_t>* offsets_for_adjustment) { | |
| 152 if (offsets_for_adjustment) { | |
| 153 std::for_each(offsets_for_adjustment->begin(), | |
| 154 offsets_for_adjustment->end(), | |
| 155 LimitOffset<std::wstring>(utf16.length())); | |
| 156 } | |
| 157 return utf16; | |
| 158 } | |
| 159 | |
| 160 #elif defined(WCHAR_T_IS_UTF32) | |
| 161 | |
| 162 bool UTF16ToWideAndAdjustOffset(const char16* src, | |
| 163 size_t src_len, | |
| 164 std::wstring* output, | |
| 165 size_t* offset_for_adjustment) { | |
| 166 std::vector<size_t> offsets; | |
| 167 if (offset_for_adjustment) | |
| 168 offsets.push_back(*offset_for_adjustment); | |
| 169 output->clear(); | |
| 170 // Assume that normally we won't have any non-BMP characters so the counts | |
| 171 // will be the same. | |
| 172 output->reserve(src_len); | |
| 173 bool ret = ConvertUnicode(src, src_len, output, &offsets); | |
| 174 if (offset_for_adjustment) | |
| 175 *offset_for_adjustment = offsets[0]; | |
| 176 return ret; | |
| 177 } | |
| 178 | |
| 179 bool UTF16ToWideAndAdjustOffsets(const char16* src, | |
| 180 size_t src_len, | |
| 181 std::wstring* output, | |
| 182 std::vector<size_t>* offsets_for_adjustment) { | |
| 183 output->clear(); | |
| 184 // Assume that normally we won't have any non-BMP characters so the counts | |
| 185 // will be the same. | |
| 186 output->reserve(src_len); | |
| 187 return ConvertUnicode(src, src_len, output, offsets_for_adjustment); | |
| 188 } | |
| 189 | |
| 190 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16, | |
| 191 size_t* offset_for_adjustment) { | 88 size_t* offset_for_adjustment) { |
| 192 std::vector<size_t> offsets; | 89 std::vector<size_t> offsets; |
| 193 if (offset_for_adjustment) | 90 if (offset_for_adjustment) |
| 194 offsets.push_back(*offset_for_adjustment); | 91 offsets.push_back(*offset_for_adjustment); |
| 195 std::wstring result; | 92 string16 result; |
| 196 UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result, | 93 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, |
| 197 &offsets); | 94 &offsets); |
| 198 if (offset_for_adjustment) | 95 if (offset_for_adjustment) |
| 199 *offset_for_adjustment = offsets[0]; | 96 *offset_for_adjustment = offsets[0]; |
| 200 return result; | 97 return result; |
| 201 } | 98 } |
| 202 | 99 |
| 203 std::wstring UTF16ToWideAndAdjustOffsets( | 100 string16 UTF8ToUTF16AndAdjustOffsets( |
| 204 const string16& utf16, | 101 const base::StringPiece& utf8, |
| 205 std::vector<size_t>* offsets_for_adjustment) { | 102 std::vector<size_t>* offsets_for_adjustment) { |
| 206 std::wstring result; | 103 string16 result; |
| 207 UTF16ToWideAndAdjustOffsets(utf16.data(), utf16.length(), &result, | 104 UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result, |
| 208 offsets_for_adjustment); | 105 offsets_for_adjustment); |
| 209 return result; | 106 return result; |
| 210 } | 107 } |
| 211 | 108 |
| 212 #endif // defined(WCHAR_T_IS_UTF32) | |
| 213 | |
| 214 AdjustOffset::Adjustment::Adjustment(size_t location, | 109 AdjustOffset::Adjustment::Adjustment(size_t location, |
| 215 size_t old_length, | 110 size_t old_length, |
| 216 size_t new_length) | 111 size_t new_length) |
| 217 : location(location), | 112 : location(location), |
| 218 old_length(old_length), | 113 old_length(old_length), |
| 219 new_length(new_length) {} | 114 new_length(new_length) {} |
| 220 | 115 |
| 221 AdjustOffset::AdjustOffset(const Adjustments& adjustments) | 116 AdjustOffset::AdjustOffset(const Adjustments& adjustments) |
| 222 : adjustments_(adjustments) {} | 117 : adjustments_(adjustments) {} |
| 223 | 118 |
| 224 void AdjustOffset::operator()(size_t& offset) { | 119 void AdjustOffset::operator()(size_t& offset) { |
| 225 if (offset == std::wstring::npos) | 120 if (offset == string16::npos) |
| 226 return; | 121 return; |
| 227 size_t adjustment = 0; | 122 size_t adjustment = 0; |
| 228 for (Adjustments::const_iterator i = adjustments_.begin(); | 123 for (Adjustments::const_iterator i = adjustments_.begin(); |
| 229 i != adjustments_.end(); ++i) { | 124 i != adjustments_.end(); ++i) { |
| 230 size_t location = i->location; | 125 size_t location = i->location; |
| 231 if (offset == location && i->new_length == 0) { | 126 if (offset == location && i->new_length == 0) { |
| 232 offset = std::wstring::npos; | 127 offset = string16::npos; |
| 233 return; | 128 return; |
| 234 } | 129 } |
| 235 if (offset <= location) | 130 if (offset <= location) |
| 236 break; | 131 break; |
| 237 if (offset < (location + i->old_length)) { | 132 if (offset < (location + i->old_length)) { |
| 238 offset = std::wstring::npos; | 133 offset = string16::npos; |
| 239 return; | 134 return; |
| 240 } | 135 } |
| 241 adjustment += (i->old_length - i->new_length); | 136 adjustment += (i->old_length - i->new_length); |
| 242 } | 137 } |
| 243 offset -= adjustment; | 138 offset -= adjustment; |
| 244 } | 139 } |
| OLD | NEW |