| Index: base/i18n/icu_string_conversions.cc
|
| ===================================================================
|
| --- base/i18n/icu_string_conversions.cc (revision 31214)
|
| +++ base/i18n/icu_string_conversions.cc (working copy)
|
| @@ -157,6 +157,90 @@
|
|
|
| // Codepage <-> Wide/UTF-16 ---------------------------------------------------
|
|
|
| +// Convert a UTF-16 string into the specified codepage_name. If the codepage
|
| +// isn't found, return false.
|
| +bool UTF16ToCodepage(const string16& utf16,
|
| + const char* codepage_name,
|
| + OnStringConversionError::Type on_error,
|
| + std::string* encoded) {
|
| + encoded->clear();
|
| +
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UConverter* converter = ucnv_open(codepage_name, &status);
|
| + if (!U_SUCCESS(status))
|
| + return false;
|
| +
|
| + return ConvertFromUTF16(converter, utf16.c_str(),
|
| + static_cast<int>(utf16.length()), on_error, encoded);
|
| +}
|
| +
|
| +bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
|
| + const char* codepage_name,
|
| + OnStringConversionError::Type on_error,
|
| + string16* utf16,
|
| + size_t* offset_for_adjustment) {
|
| + utf16->clear();
|
| +
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UConverter* converter = ucnv_open(codepage_name, &status);
|
| + if (!U_SUCCESS(status))
|
| + return false;
|
| +
|
| + // Even in the worst case, the maximum length in 2-byte units of UTF-16
|
| + // output would be at most the same as the number of bytes in input. There
|
| + // is no single-byte encoding in which a character is mapped to a
|
| + // non-BMP character requiring two 2-byte units.
|
| + //
|
| + // Moreover, non-BMP characters in legacy multibyte encodings
|
| + // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
|
| + // BOCU and SCSU, but we don't care about them.
|
| + size_t uchar_max_length = encoded.length() + 1;
|
| +
|
| + SetUpErrorHandlerForToUChars(on_error, converter, &status);
|
| + char16* byte_buffer = WriteInto(utf16, uchar_max_length);
|
| + int byte_buffer_length = static_cast<int>(uchar_max_length);
|
| + const char* data = encoded.data();
|
| + int length = static_cast<int>(encoded.length());
|
| + int actual_size = 0;
|
| + if (offset_for_adjustment) {
|
| + if (*offset_for_adjustment >= encoded.length()) {
|
| + *offset_for_adjustment = string16::npos;
|
| + } else if (*offset_for_adjustment != 0) {
|
| + // Try to adjust the offset by converting the string in two pieces and
|
| + // using the length of the first piece as the adjusted offset.
|
| + actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,
|
| + data, static_cast<int>(*offset_for_adjustment), &status);
|
| + if (U_SUCCESS(status)) {
|
| + // Conversion succeeded, so update the offset and then fall through to
|
| + // appending the second half of the string.
|
| + data += *offset_for_adjustment;
|
| + length -= *offset_for_adjustment;
|
| + *offset_for_adjustment = actual_size;
|
| + byte_buffer += actual_size;
|
| + byte_buffer_length -= actual_size;
|
| + } else {
|
| + // The offset may have been in the middle of an encoding sequence; mark
|
| + // it as having failed to adjust and then try to convert the entire
|
| + // string.
|
| + *offset_for_adjustment = string16::npos;
|
| + actual_size = 0;
|
| + ucnv_reset(converter);
|
| + status = U_ZERO_ERROR;
|
| + }
|
| + }
|
| + }
|
| + actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,
|
| + length, &status);
|
| + ucnv_close(converter);
|
| + if (!U_SUCCESS(status)) {
|
| + utf16->clear(); // Make sure the output is empty on error.
|
| + return false;
|
| + }
|
| +
|
| + utf16->resize(actual_size);
|
| + return true;
|
| +}
|
| +
|
| // Convert a wstring into the specified codepage_name. If the codepage
|
| // isn't found, return false.
|
| bool WideToCodepage(const std::wstring& wide,
|
| @@ -188,31 +272,16 @@
|
| #endif // defined(WCHAR_T_IS_UTF32)
|
| }
|
|
|
| -// Convert a UTF-16 string into the specified codepage_name. If the codepage
|
| -// isn't found, return false.
|
| -bool UTF16ToCodepage(const string16& utf16,
|
| - const char* codepage_name,
|
| - OnStringConversionError::Type on_error,
|
| - std::string* encoded) {
|
| - encoded->clear();
|
| -
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UConverter* converter = ucnv_open(codepage_name, &status);
|
| - if (!U_SUCCESS(status))
|
| - return false;
|
| -
|
| - return ConvertFromUTF16(converter, utf16.c_str(),
|
| - static_cast<int>(utf16.length()), on_error, encoded);
|
| -}
|
| -
|
| // Converts a string of the given codepage into wstring.
|
| // If the codepage isn't found, return false.
|
| -bool CodepageToWide(const std::string& encoded,
|
| - const char* codepage_name,
|
| - OnStringConversionError::Type on_error,
|
| - std::wstring* wide) {
|
| +bool CodepageToWideAndAdjustOffset(const std::string& encoded,
|
| + const char* codepage_name,
|
| + OnStringConversionError::Type on_error,
|
| + std::wstring* wide,
|
| + size_t* offset_for_adjustment) {
|
| #if defined(WCHAR_T_IS_UTF16)
|
| - return CodepageToUTF16(encoded, codepage_name, on_error, wide);
|
| + return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,
|
| + offset_for_adjustment);
|
| #elif defined(WCHAR_T_IS_UTF32)
|
| wide->clear();
|
|
|
| @@ -227,70 +296,53 @@
|
| // this can be 4 times larger than actually needed.
|
| size_t wchar_max_length = encoded.length() + 1;
|
|
|
| - // The byte buffer and its length to pass to ucnv_toAlgorithimic.
|
| - char* byte_buffer = reinterpret_cast<char*>(
|
| - WriteInto(wide, wchar_max_length));
|
| - int byte_buffer_length = static_cast<int>(wchar_max_length) * 4;
|
| -
|
| SetUpErrorHandlerForToUChars(on_error, converter, &status);
|
| - int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(),
|
| - converter,
|
| - byte_buffer,
|
| - byte_buffer_length,
|
| - encoded.data(),
|
| - static_cast<int>(encoded.length()),
|
| - &status);
|
| + char* byte_buffer =
|
| + reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));
|
| + int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);
|
| + const char* data = encoded.data();
|
| + int length = static_cast<int>(encoded.length());
|
| + int actual_size = 0;
|
| + if (offset_for_adjustment) {
|
| + if (*offset_for_adjustment >= encoded.length()) {
|
| + *offset_for_adjustment = std::wstring::npos;
|
| + } else if (*offset_for_adjustment != 0) {
|
| + // Try to adjust the offset by converting the string in two pieces and
|
| + // using the length of the first piece as the adjusted offset.
|
| + actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
|
| + byte_buffer, byte_buffer_length, data,
|
| + static_cast<int>(*offset_for_adjustment), &status);
|
| + if (U_SUCCESS(status)) {
|
| + // Conversion succeeded, so update the offset and then fall through to
|
| + // appending the second half of the string.
|
| + data += *offset_for_adjustment;
|
| + length -= *offset_for_adjustment;
|
| + *offset_for_adjustment = actual_size / sizeof(wchar_t);
|
| + byte_buffer += actual_size;
|
| + byte_buffer_length -= actual_size;
|
| + } else {
|
| + // The offset may have been in the middle of an encoding sequence; mark
|
| + // it as having failed to adjust and then try to convert the entire
|
| + // string.
|
| + *offset_for_adjustment = std::wstring::npos;
|
| + actual_size = 0;
|
| + ucnv_reset(converter);
|
| + status = U_ZERO_ERROR;
|
| + }
|
| + }
|
| + }
|
| + actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
|
| + byte_buffer, byte_buffer_length, data, length, &status);
|
| ucnv_close(converter);
|
| -
|
| if (!U_SUCCESS(status)) {
|
| wide->clear(); // Make sure the output is empty on error.
|
| return false;
|
| }
|
|
|
| // actual_size is # of bytes.
|
| - wide->resize(actual_size / 4);
|
| + wide->resize(actual_size / sizeof(wchar_t));
|
| return true;
|
| #endif // defined(WCHAR_T_IS_UTF32)
|
| }
|
|
|
| -// Converts a string of the given codepage into UTF-16.
|
| -// If the codepage isn't found, return false.
|
| -bool CodepageToUTF16(const std::string& encoded,
|
| - const char* codepage_name,
|
| - OnStringConversionError::Type on_error,
|
| - string16* utf16) {
|
| - utf16->clear();
|
| -
|
| - UErrorCode status = U_ZERO_ERROR;
|
| - UConverter* converter = ucnv_open(codepage_name, &status);
|
| - if (!U_SUCCESS(status))
|
| - return false;
|
| -
|
| - // Even in the worst case, the maximum length in 2-byte units of UTF-16
|
| - // output would be at most the same as the number of bytes in input. There
|
| - // is no single-byte encoding in which a character is mapped to a
|
| - // non-BMP character requiring two 2-byte units.
|
| - //
|
| - // Moreover, non-BMP characters in legacy multibyte encodings
|
| - // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
|
| - // BOCU and SCSU, but we don't care about them.
|
| - size_t uchar_max_length = encoded.length() + 1;
|
| -
|
| - SetUpErrorHandlerForToUChars(on_error, converter, &status);
|
| - int actual_size = ucnv_toUChars(converter,
|
| - WriteInto(utf16, uchar_max_length),
|
| - static_cast<int>(uchar_max_length),
|
| - encoded.data(),
|
| - static_cast<int>(encoded.length()),
|
| - &status);
|
| - ucnv_close(converter);
|
| - if (!U_SUCCESS(status)) {
|
| - utf16->clear(); // Make sure the output is empty on error.
|
| - return false;
|
| - }
|
| -
|
| - utf16->resize(actual_size);
|
| - return true;
|
| -}
|
| -
|
| } // namespace base
|
|
|