| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/i18n/icu_string_conversions.h" | 5 #include "base/i18n/icu_string_conversions.h" |
| 6 | 6 |
| 7 #include <vector> | 7 #include <vector> |
| 8 | 8 |
| 9 #include "base/basictypes.h" | 9 #include "base/basictypes.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 181 // output would be at most the same as the number of bytes in input. There | 181 // output would be at most the same as the number of bytes in input. There |
| 182 // is no single-byte encoding in which a character is mapped to a | 182 // is no single-byte encoding in which a character is mapped to a |
| 183 // non-BMP character requiring two 2-byte units. | 183 // non-BMP character requiring two 2-byte units. |
| 184 // | 184 // |
| 185 // Moreover, non-BMP characters in legacy multibyte encodings | 185 // Moreover, non-BMP characters in legacy multibyte encodings |
| 186 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are | 186 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are |
| 187 // BOCU and SCSU, but we don't care about them. | 187 // BOCU and SCSU, but we don't care about them. |
| 188 size_t uchar_max_length = encoded.length() + 1; | 188 size_t uchar_max_length = encoded.length() + 1; |
| 189 | 189 |
| 190 SetUpErrorHandlerForToUChars(on_error, converter, &status); | 190 SetUpErrorHandlerForToUChars(on_error, converter, &status); |
| 191 scoped_array<char16> buffer(new char16[uchar_max_length]); | 191 scoped_ptr<char16[]> buffer(new char16[uchar_max_length]); |
| 192 int actual_size = ucnv_toUChars(converter, buffer.get(), | 192 int actual_size = ucnv_toUChars(converter, buffer.get(), |
| 193 static_cast<int>(uchar_max_length), encoded.data(), | 193 static_cast<int>(uchar_max_length), encoded.data(), |
| 194 static_cast<int>(encoded.length()), &status); | 194 static_cast<int>(encoded.length()), &status); |
| 195 ucnv_close(converter); | 195 ucnv_close(converter); |
| 196 if (!U_SUCCESS(status)) { | 196 if (!U_SUCCESS(status)) { |
| 197 utf16->clear(); // Make sure the output is empty on error. | 197 utf16->clear(); // Make sure the output is empty on error. |
| 198 return false; | 198 return false; |
| 199 } | 199 } |
| 200 | 200 |
| 201 utf16->assign(buffer.get(), actual_size); | 201 utf16->assign(buffer.get(), actual_size); |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 246 if (!U_SUCCESS(status)) | 246 if (!U_SUCCESS(status)) |
| 247 return false; | 247 return false; |
| 248 | 248 |
| 249 // The maximum length in 4 byte unit of UTF-32 output would be | 249 // The maximum length in 4 byte unit of UTF-32 output would be |
| 250 // at most the same as the number of bytes in input. In the worst | 250 // at most the same as the number of bytes in input. In the worst |
| 251 // case of GB18030 (excluding escaped-based encodings like ISO-2022-JP), | 251 // case of GB18030 (excluding escaped-based encodings like ISO-2022-JP), |
| 252 // this can be 4 times larger than actually needed. | 252 // this can be 4 times larger than actually needed. |
| 253 size_t wchar_max_length = encoded.length() + 1; | 253 size_t wchar_max_length = encoded.length() + 1; |
| 254 | 254 |
| 255 SetUpErrorHandlerForToUChars(on_error, converter, &status); | 255 SetUpErrorHandlerForToUChars(on_error, converter, &status); |
| 256 scoped_array<wchar_t> buffer(new wchar_t[wchar_max_length]); | 256 scoped_ptr<wchar_t[]> buffer(new wchar_t[wchar_max_length]); |
| 257 int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter, | 257 int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter, |
| 258 reinterpret_cast<char*>(buffer.get()), | 258 reinterpret_cast<char*>(buffer.get()), |
| 259 static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(), | 259 static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(), |
| 260 static_cast<int>(encoded.length()), &status); | 260 static_cast<int>(encoded.length()), &status); |
| 261 ucnv_close(converter); | 261 ucnv_close(converter); |
| 262 if (!U_SUCCESS(status)) { | 262 if (!U_SUCCESS(status)) { |
| 263 wide->clear(); // Make sure the output is empty on error. | 263 wide->clear(); // Make sure the output is empty on error. |
| 264 return false; | 264 return false; |
| 265 } | 265 } |
| 266 | 266 |
| 267 // actual_size is # of bytes. | 267 // actual_size is # of bytes. |
| 268 wide->assign(buffer.get(), actual_size / sizeof(wchar_t)); | 268 wide->assign(buffer.get(), actual_size / sizeof(wchar_t)); |
| 269 return true; | 269 return true; |
| 270 #endif // defined(WCHAR_T_IS_UTF32) | 270 #endif // defined(WCHAR_T_IS_UTF32) |
| 271 } | 271 } |
| 272 | 272 |
| 273 bool ConvertToUtf8AndNormalize(const std::string& text, | 273 bool ConvertToUtf8AndNormalize(const std::string& text, |
| 274 const std::string& charset, | 274 const std::string& charset, |
| 275 std::string* result) { | 275 std::string* result) { |
| 276 result->clear(); | 276 result->clear(); |
| 277 string16 utf16; | 277 string16 utf16; |
| 278 if (!CodepageToUTF16( | 278 if (!CodepageToUTF16( |
| 279 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) | 279 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) |
| 280 return false; | 280 return false; |
| 281 | 281 |
| 282 UErrorCode status = U_ZERO_ERROR; | 282 UErrorCode status = U_ZERO_ERROR; |
| 283 size_t max_length = utf16.length() + 1; | 283 size_t max_length = utf16.length() + 1; |
| 284 string16 normalized_utf16; | 284 string16 normalized_utf16; |
| 285 scoped_array<char16> buffer(new char16[max_length]); | 285 scoped_ptr<char16[]> buffer(new char16[max_length]); |
| 286 int actual_length = unorm_normalize( | 286 int actual_length = unorm_normalize( |
| 287 utf16.c_str(), utf16.length(), UNORM_NFC, 0, | 287 utf16.c_str(), utf16.length(), UNORM_NFC, 0, |
| 288 buffer.get(), static_cast<int>(max_length), &status); | 288 buffer.get(), static_cast<int>(max_length), &status); |
| 289 if (!U_SUCCESS(status)) | 289 if (!U_SUCCESS(status)) |
| 290 return false; | 290 return false; |
| 291 normalized_utf16.assign(buffer.get(), actual_length); | 291 normalized_utf16.assign(buffer.get(), actual_length); |
| 292 | 292 |
| 293 return UTF16ToUTF8(normalized_utf16.data(), | 293 return UTF16ToUTF8(normalized_utf16.data(), |
| 294 normalized_utf16.length(), result); | 294 normalized_utf16.length(), result); |
| 295 } | 295 } |
| 296 | 296 |
| 297 } // namespace base | 297 } // namespace base |
| OLD | NEW |