| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/net_string_util.h" | 5 #include "net/base/net_string_util.h" |
| 6 | 6 |
| 7 #include "base/i18n/i18n_constants.h" | 7 #include "base/i18n/i18n_constants.h" |
| 8 #include "base/i18n/icu_string_conversions.h" | 8 #include "base/i18n/icu_string_conversions.h" |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "third_party/icu/source/common/unicode/ucnv.h" | 10 #include "third_party/icu/source/common/unicode/ucnv.h" |
| 11 | 11 |
| 12 namespace net { | 12 namespace net { |
| 13 | 13 |
| 14 const char* const kCharsetLatin1 = base::kCodepageLatin1; | 14 const char* const kCharsetLatin1 = base::kCodepageLatin1; |
| 15 | 15 |
| 16 bool ConvertToUtf8(const std::string& text, const char* charset, | 16 bool ConvertToUtf8(const std::string& text, |
| 17 const char* charset, |
| 17 std::string* output) { | 18 std::string* output) { |
| 18 output->clear(); | 19 output->clear(); |
| 19 | 20 |
| 20 UErrorCode err = U_ZERO_ERROR; | 21 UErrorCode err = U_ZERO_ERROR; |
| 21 UConverter* converter(ucnv_open(charset, &err)); | 22 UConverter* converter(ucnv_open(charset, &err)); |
| 22 if (U_FAILURE(err)) | 23 if (U_FAILURE(err)) |
| 23 return false; | 24 return false; |
| 24 | 25 |
| 25 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. | 26 // A single byte in a legacy encoding can be expanded to 3 bytes in UTF-8. |
| 26 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes | 27 // A 'two-byte character' in a legacy encoding can be expanded to 4 bytes |
| 27 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a | 28 // in UTF-8. Therefore, the expansion ratio is 3 at most. Add one for a |
| 28 // trailing '\0'. | 29 // trailing '\0'. |
| 29 size_t output_length = text.length() * 3 + 1; | 30 size_t output_length = text.length() * 3 + 1; |
| 30 char* buf = WriteInto(output, output_length); | 31 char* buf = WriteInto(output, output_length); |
| 31 output_length = ucnv_toAlgorithmic(UCNV_UTF8, converter, buf, output_length, | 32 output_length = ucnv_toAlgorithmic(UCNV_UTF8, |
| 32 text.data(), text.length(), &err); | 33 converter, |
| 34 buf, |
| 35 output_length, |
| 36 text.data(), |
| 37 text.length(), |
| 38 &err); |
| 33 ucnv_close(converter); | 39 ucnv_close(converter); |
| 34 if (U_FAILURE(err)) { | 40 if (U_FAILURE(err)) { |
| 35 output->clear(); | 41 output->clear(); |
| 36 return false; | 42 return false; |
| 37 } | 43 } |
| 38 | 44 |
| 39 output->resize(output_length); | 45 output->resize(output_length); |
| 40 return true; | 46 return true; |
| 41 } | 47 } |
| 42 | 48 |
| 43 bool ConvertToUtf8AndNormalize(const std::string& text, const char* charset, | 49 bool ConvertToUtf8AndNormalize(const std::string& text, |
| 50 const char* charset, |
| 44 std::string* output) { | 51 std::string* output) { |
| 45 return base::ConvertToUtf8AndNormalize(text, charset, output); | 52 return base::ConvertToUtf8AndNormalize(text, charset, output); |
| 46 } | 53 } |
| 47 | 54 |
| 48 bool ConvertToUTF16(const std::string& text, const char* charset, | 55 bool ConvertToUTF16(const std::string& text, |
| 56 const char* charset, |
| 49 base::string16* output) { | 57 base::string16* output) { |
| 50 return base::CodepageToUTF16(text, charset, | 58 return base::CodepageToUTF16( |
| 51 base::OnStringConversionError::FAIL, output); | 59 text, charset, base::OnStringConversionError::FAIL, output); |
| 52 } | 60 } |
| 53 | 61 |
| 54 bool ConvertToUTF16WithSubstitutions(const std::string& text, | 62 bool ConvertToUTF16WithSubstitutions(const std::string& text, |
| 55 const char* charset, | 63 const char* charset, |
| 56 base::string16* output) { | 64 base::string16* output) { |
| 57 return base::CodepageToUTF16(text, charset, | 65 return base::CodepageToUTF16( |
| 58 base::OnStringConversionError::SUBSTITUTE, | 66 text, charset, base::OnStringConversionError::SUBSTITUTE, output); |
| 59 output); | |
| 60 } | 67 } |
| 61 | 68 |
| 62 } // namespace net | 69 } // namespace net |
| OLD | NEW |