| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/i18n/icu_string_conversions.h" | 5 #include "base/i18n/icu_string_conversions.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <memory> |
| 10 #include <vector> | 11 #include <vector> |
| 11 | 12 |
| 12 #include "base/logging.h" | 13 #include "base/logging.h" |
| 13 #include "base/memory/scoped_ptr.h" | |
| 14 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
| 15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
| 16 #include "third_party/icu/source/common/unicode/ucnv.h" | 16 #include "third_party/icu/source/common/unicode/ucnv.h" |
| 17 #include "third_party/icu/source/common/unicode/ucnv_cb.h" | 17 #include "third_party/icu/source/common/unicode/ucnv_cb.h" |
| 18 #include "third_party/icu/source/common/unicode/ucnv_err.h" | 18 #include "third_party/icu/source/common/unicode/ucnv_err.h" |
| 19 #include "third_party/icu/source/common/unicode/unorm.h" | 19 #include "third_party/icu/source/common/unicode/unorm.h" |
| 20 #include "third_party/icu/source/common/unicode/ustring.h" | 20 #include "third_party/icu/source/common/unicode/ustring.h" |
| 21 | 21 |
| 22 namespace base { | 22 namespace base { |
| 23 | 23 |
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 170 // output would be at most the same as the number of bytes in input. There | 170 // output would be at most the same as the number of bytes in input. There |
| 171 // is no single-byte encoding in which a character is mapped to a | 171 // is no single-byte encoding in which a character is mapped to a |
| 172 // non-BMP character requiring two 2-byte units. | 172 // non-BMP character requiring two 2-byte units. |
| 173 // | 173 // |
| 174 // Moreover, non-BMP characters in legacy multibyte encodings | 174 // Moreover, non-BMP characters in legacy multibyte encodings |
| 175 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are | 175 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are |
| 176 // BOCU and SCSU, but we don't care about them. | 176 // BOCU and SCSU, but we don't care about them. |
| 177 size_t uchar_max_length = encoded.length() + 1; | 177 size_t uchar_max_length = encoded.length() + 1; |
| 178 | 178 |
| 179 SetUpErrorHandlerForToUChars(on_error, converter, &status); | 179 SetUpErrorHandlerForToUChars(on_error, converter, &status); |
| 180 scoped_ptr<char16[]> buffer(new char16[uchar_max_length]); | 180 std::unique_ptr<char16[]> buffer(new char16[uchar_max_length]); |
| 181 int actual_size = ucnv_toUChars(converter, buffer.get(), | 181 int actual_size = ucnv_toUChars(converter, buffer.get(), |
| 182 static_cast<int>(uchar_max_length), encoded.data(), | 182 static_cast<int>(uchar_max_length), encoded.data(), |
| 183 static_cast<int>(encoded.length()), &status); | 183 static_cast<int>(encoded.length()), &status); |
| 184 ucnv_close(converter); | 184 ucnv_close(converter); |
| 185 if (!U_SUCCESS(status)) { | 185 if (!U_SUCCESS(status)) { |
| 186 utf16->clear(); // Make sure the output is empty on error. | 186 utf16->clear(); // Make sure the output is empty on error. |
| 187 return false; | 187 return false; |
| 188 } | 188 } |
| 189 | 189 |
| 190 utf16->assign(buffer.get(), actual_size); | 190 utf16->assign(buffer.get(), actual_size); |
| 191 return true; | 191 return true; |
| 192 } | 192 } |
| 193 | 193 |
| 194 bool ConvertToUtf8AndNormalize(const std::string& text, | 194 bool ConvertToUtf8AndNormalize(const std::string& text, |
| 195 const std::string& charset, | 195 const std::string& charset, |
| 196 std::string* result) { | 196 std::string* result) { |
| 197 result->clear(); | 197 result->clear(); |
| 198 string16 utf16; | 198 string16 utf16; |
| 199 if (!CodepageToUTF16( | 199 if (!CodepageToUTF16( |
| 200 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) | 200 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) |
| 201 return false; | 201 return false; |
| 202 | 202 |
| 203 UErrorCode status = U_ZERO_ERROR; | 203 UErrorCode status = U_ZERO_ERROR; |
| 204 size_t max_length = utf16.length() + 1; | 204 size_t max_length = utf16.length() + 1; |
| 205 string16 normalized_utf16; | 205 string16 normalized_utf16; |
| 206 scoped_ptr<char16[]> buffer(new char16[max_length]); | 206 std::unique_ptr<char16[]> buffer(new char16[max_length]); |
| 207 int actual_length = unorm_normalize( | 207 int actual_length = unorm_normalize( |
| 208 utf16.c_str(), utf16.length(), UNORM_NFC, 0, | 208 utf16.c_str(), utf16.length(), UNORM_NFC, 0, |
| 209 buffer.get(), static_cast<int>(max_length), &status); | 209 buffer.get(), static_cast<int>(max_length), &status); |
| 210 if (!U_SUCCESS(status)) | 210 if (!U_SUCCESS(status)) |
| 211 return false; | 211 return false; |
| 212 normalized_utf16.assign(buffer.get(), actual_length); | 212 normalized_utf16.assign(buffer.get(), actual_length); |
| 213 | 213 |
| 214 return UTF16ToUTF8(normalized_utf16.data(), | 214 return UTF16ToUTF8(normalized_utf16.data(), |
| 215 normalized_utf16.length(), result); | 215 normalized_utf16.length(), result); |
| 216 } | 216 } |
| 217 | 217 |
| 218 } // namespace base | 218 } // namespace base |
| OLD | NEW |