OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/icu_string_conversions.h" | 5 #include "base/i18n/icu_string_conversions.h" |
6 | 6 |
7 #include <vector> | 7 #include <vector> |
8 | 8 |
9 #include "base/basictypes.h" | 9 #include "base/basictypes.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/string_util.h" | 11 #include "base/string_util.h" |
| 12 #include "base/utf_string_conversions.h" |
12 #include "unicode/ucnv.h" | 13 #include "unicode/ucnv.h" |
13 #include "unicode/ucnv_cb.h" | 14 #include "unicode/ucnv_cb.h" |
14 #include "unicode/ucnv_err.h" | 15 #include "unicode/ucnv_err.h" |
| 16 #include "unicode/unorm.h" |
15 #include "unicode/ustring.h" | 17 #include "unicode/ustring.h" |
16 | 18 |
17 namespace base { | 19 namespace base { |
18 | 20 |
19 namespace { | 21 namespace { |
20 // ToUnicodeCallbackSubstitute() is based on UCNV_TO_U_CALLBACK_SUSBSTITUTE | 22 // ToUnicodeCallbackSubstitute() is based on UCNV_TO_U_CALLBACK_SUSBSTITUTE |
21 // in source/common/ucnv_err.c. | 23 // in source/common/ucnv_err.c. |
22 | 24 |
23 // Copyright (c) 1995-2006 International Business Machines Corporation | 25 // Copyright (c) 1995-2006 International Business Machines Corporation |
24 // and others | 26 // and others |
(...skipping 232 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 wide->clear(); // Make sure the output is empty on error. | 259 wide->clear(); // Make sure the output is empty on error. |
258 return false; | 260 return false; |
259 } | 261 } |
260 | 262 |
261 // actual_size is # of bytes. | 263 // actual_size is # of bytes. |
262 wide->resize(actual_size / sizeof(wchar_t)); | 264 wide->resize(actual_size / sizeof(wchar_t)); |
263 return true; | 265 return true; |
264 #endif // defined(WCHAR_T_IS_UTF32) | 266 #endif // defined(WCHAR_T_IS_UTF32) |
265 } | 267 } |
266 | 268 |
| 269 bool ConvertToUtf8AndNormalize(const std::string& text, |
| 270 const std::string& charset, |
| 271 std::string* result) { |
| 272 result->clear(); |
| 273 string16 utf16; |
| 274 if (!CodepageToUTF16( |
| 275 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) |
| 276 return false; |
| 277 |
| 278 UErrorCode status = U_ZERO_ERROR; |
| 279 size_t max_length = utf16.length() + 1; |
| 280 string16 normalized_utf16; |
| 281 int actual_length = unorm_normalize( |
| 282 utf16.c_str(), utf16.length(), UNORM_NFC, 0, |
| 283 WriteInto(&normalized_utf16, max_length), |
| 284 static_cast<int>(max_length), &status); |
| 285 if (!U_SUCCESS(status)) |
| 286 return false; |
| 287 normalized_utf16.resize(actual_length); |
| 288 |
| 289 return UTF16ToUTF8(normalized_utf16.data(), |
| 290 normalized_utf16.length(), result); |
| 291 } |
| 292 |
267 } // namespace base | 293 } // namespace base |
OLD | NEW |