OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/i18n/icu_string_conversions.h" | 5 #include "base/i18n/icu_string_conversions.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 | 9 |
| 10 #include <memory> |
10 #include <vector> | 11 #include <vector> |
11 | 12 |
12 #include "base/logging.h" | 13 #include "base/logging.h" |
13 #include "base/memory/scoped_ptr.h" | |
14 #include "base/strings/string_util.h" | 14 #include "base/strings/string_util.h" |
15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
16 #include "third_party/icu/source/common/unicode/ucnv.h" | 16 #include "third_party/icu/source/common/unicode/ucnv.h" |
17 #include "third_party/icu/source/common/unicode/ucnv_cb.h" | 17 #include "third_party/icu/source/common/unicode/ucnv_cb.h" |
18 #include "third_party/icu/source/common/unicode/ucnv_err.h" | 18 #include "third_party/icu/source/common/unicode/ucnv_err.h" |
19 #include "third_party/icu/source/common/unicode/unorm.h" | 19 #include "third_party/icu/source/common/unicode/unorm.h" |
20 #include "third_party/icu/source/common/unicode/ustring.h" | 20 #include "third_party/icu/source/common/unicode/ustring.h" |
21 | 21 |
22 namespace base { | 22 namespace base { |
23 | 23 |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 // output would be at most the same as the number of bytes in input. There | 170 // output would be at most the same as the number of bytes in input. There |
171 // is no single-byte encoding in which a character is mapped to a | 171 // is no single-byte encoding in which a character is mapped to a |
172 // non-BMP character requiring two 2-byte units. | 172 // non-BMP character requiring two 2-byte units. |
173 // | 173 // |
174 // Moreover, non-BMP characters in legacy multibyte encodings | 174 // Moreover, non-BMP characters in legacy multibyte encodings |
175 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are | 175 // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are |
176 // BOCU and SCSU, but we don't care about them. | 176 // BOCU and SCSU, but we don't care about them. |
177 size_t uchar_max_length = encoded.length() + 1; | 177 size_t uchar_max_length = encoded.length() + 1; |
178 | 178 |
179 SetUpErrorHandlerForToUChars(on_error, converter, &status); | 179 SetUpErrorHandlerForToUChars(on_error, converter, &status); |
180 scoped_ptr<char16[]> buffer(new char16[uchar_max_length]); | 180 std::unique_ptr<char16[]> buffer(new char16[uchar_max_length]); |
181 int actual_size = ucnv_toUChars(converter, buffer.get(), | 181 int actual_size = ucnv_toUChars(converter, buffer.get(), |
182 static_cast<int>(uchar_max_length), encoded.data(), | 182 static_cast<int>(uchar_max_length), encoded.data(), |
183 static_cast<int>(encoded.length()), &status); | 183 static_cast<int>(encoded.length()), &status); |
184 ucnv_close(converter); | 184 ucnv_close(converter); |
185 if (!U_SUCCESS(status)) { | 185 if (!U_SUCCESS(status)) { |
186 utf16->clear(); // Make sure the output is empty on error. | 186 utf16->clear(); // Make sure the output is empty on error. |
187 return false; | 187 return false; |
188 } | 188 } |
189 | 189 |
190 utf16->assign(buffer.get(), actual_size); | 190 utf16->assign(buffer.get(), actual_size); |
191 return true; | 191 return true; |
192 } | 192 } |
193 | 193 |
194 bool ConvertToUtf8AndNormalize(const std::string& text, | 194 bool ConvertToUtf8AndNormalize(const std::string& text, |
195 const std::string& charset, | 195 const std::string& charset, |
196 std::string* result) { | 196 std::string* result) { |
197 result->clear(); | 197 result->clear(); |
198 string16 utf16; | 198 string16 utf16; |
199 if (!CodepageToUTF16( | 199 if (!CodepageToUTF16( |
200 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) | 200 text, charset.c_str(), OnStringConversionError::FAIL, &utf16)) |
201 return false; | 201 return false; |
202 | 202 |
203 UErrorCode status = U_ZERO_ERROR; | 203 UErrorCode status = U_ZERO_ERROR; |
204 size_t max_length = utf16.length() + 1; | 204 size_t max_length = utf16.length() + 1; |
205 string16 normalized_utf16; | 205 string16 normalized_utf16; |
206 scoped_ptr<char16[]> buffer(new char16[max_length]); | 206 std::unique_ptr<char16[]> buffer(new char16[max_length]); |
207 int actual_length = unorm_normalize( | 207 int actual_length = unorm_normalize( |
208 utf16.c_str(), utf16.length(), UNORM_NFC, 0, | 208 utf16.c_str(), utf16.length(), UNORM_NFC, 0, |
209 buffer.get(), static_cast<int>(max_length), &status); | 209 buffer.get(), static_cast<int>(max_length), &status); |
210 if (!U_SUCCESS(status)) | 210 if (!U_SUCCESS(status)) |
211 return false; | 211 return false; |
212 normalized_utf16.assign(buffer.get(), actual_length); | 212 normalized_utf16.assign(buffer.get(), actual_length); |
213 | 213 |
214 return UTF16ToUTF8(normalized_utf16.data(), | 214 return UTF16ToUTF8(normalized_utf16.data(), |
215 normalized_utf16.length(), result); | 215 normalized_utf16.length(), result); |
216 } | 216 } |
217 | 217 |
218 } // namespace base | 218 } // namespace base |
OLD | NEW |