base/i18n/icu_string_conversions.cc - Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur...

Unified Diff: base/i18n/icu_string_conversions.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/i18n/icu_string_conversions.cc

===================================================================

--- base/i18n/icu_string_conversions.cc (revision 31214)

+++ base/i18n/icu_string_conversions.cc (working copy)

@@ -157,6 +157,90 @@

// Codepage <-> Wide/UTF-16 ---------------------------------------------------

+// Convert a UTF-16 string into the specified codepage_name. If the codepage

+// isn't found, return false.

+bool UTF16ToCodepage(const string16& utf16,

+ const char* codepage_name,

+ OnStringConversionError::Type on_error,

+ std::string* encoded) {

+ encoded->clear();

+ UErrorCode status = U_ZERO_ERROR;

+ UConverter* converter = ucnv_open(codepage_name, &status);

+ if (!U_SUCCESS(status))

+ return false;

+ return ConvertFromUTF16(converter, utf16.c_str(),

+ static_cast<int>(utf16.length()), on_error, encoded);

+bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,

+ const char* codepage_name,

+ OnStringConversionError::Type on_error,

+ string16* utf16,

+ size_t* offset_for_adjustment) {

+ utf16->clear();

+ UErrorCode status = U_ZERO_ERROR;

+ UConverter* converter = ucnv_open(codepage_name, &status);

+ if (!U_SUCCESS(status))

+ return false;

+ // Even in the worst case, the maximum length in 2-byte units of UTF-16

+ // output would be at most the same as the number of bytes in input. There

+ // is no single-byte encoding in which a character is mapped to a

+ // non-BMP character requiring two 2-byte units.

+ //

+ // Moreover, non-BMP characters in legacy multibyte encodings

+ // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are

+ // BOCU and SCSU, but we don't care about them.

+ size_t uchar_max_length = encoded.length() + 1;

+ SetUpErrorHandlerForToUChars(on_error, converter, &status);

+ char16* byte_buffer = WriteInto(utf16, uchar_max_length);

+ int byte_buffer_length = static_cast<int>(uchar_max_length);

+ const char* data = encoded.data();

+ int length = static_cast<int>(encoded.length());

+ int actual_size = 0;

+ if (offset_for_adjustment) {

+ if (*offset_for_adjustment >= encoded.length()) {

+ *offset_for_adjustment = string16::npos;

+ } else if (*offset_for_adjustment != 0) {

+ // Try to adjust the offset by converting the string in two pieces and

+ // using the length of the first piece as the adjusted offset.

+ actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,

+ data, static_cast<int>(*offset_for_adjustment), &status);

+ if (U_SUCCESS(status)) {

+ // Conversion succeeded, so update the offset and then fall through to

+ // appending the second half of the string.

+ data += *offset_for_adjustment;

+ length -= *offset_for_adjustment;

+ *offset_for_adjustment = actual_size;

+ byte_buffer += actual_size;

+ byte_buffer_length -= actual_size;

+ } else {

+ // The offset may have been in the middle of an encoding sequence; mark

+ // it as having failed to adjust and then try to convert the entire

+ // string.

+ *offset_for_adjustment = string16::npos;

+ actual_size = 0;

+ ucnv_reset(converter);

+ status = U_ZERO_ERROR;

+ }

+ actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,

+ length, &status);

+ ucnv_close(converter);

+ if (!U_SUCCESS(status)) {

+ utf16->clear(); // Make sure the output is empty on error.

+ return false;

+ }

+ utf16->resize(actual_size);

+ return true;

// Convert a wstring into the specified codepage_name. If the codepage

// isn't found, return false.

bool WideToCodepage(const std::wstring& wide,

@@ -188,31 +272,16 @@

#endif // defined(WCHAR_T_IS_UTF32)

}

-// Convert a UTF-16 string into the specified codepage_name. If the codepage

-// isn't found, return false.

-bool UTF16ToCodepage(const string16& utf16,

- const char* codepage_name,

- OnStringConversionError::Type on_error,

- std::string* encoded) {

- encoded->clear();

- UErrorCode status = U_ZERO_ERROR;

- UConverter* converter = ucnv_open(codepage_name, &status);

- if (!U_SUCCESS(status))

- return false;

- return ConvertFromUTF16(converter, utf16.c_str(),

- static_cast<int>(utf16.length()), on_error, encoded);

// Converts a string of the given codepage into wstring.

// If the codepage isn't found, return false.

-bool CodepageToWide(const std::string& encoded,

- const char* codepage_name,

- OnStringConversionError::Type on_error,

- std::wstring* wide) {

+bool CodepageToWideAndAdjustOffset(const std::string& encoded,

+ const char* codepage_name,

+ OnStringConversionError::Type on_error,

+ std::wstring* wide,

+ size_t* offset_for_adjustment) {

#if defined(WCHAR_T_IS_UTF16)

- return CodepageToUTF16(encoded, codepage_name, on_error, wide);

+ return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,

+ offset_for_adjustment);

#elif defined(WCHAR_T_IS_UTF32)

wide->clear();

@@ -227,70 +296,53 @@

// this can be 4 times larger than actually needed.

size_t wchar_max_length = encoded.length() + 1;

- // The byte buffer and its length to pass to ucnv_toAlgorithimic.

- char* byte_buffer = reinterpret_cast<char*>(

- WriteInto(wide, wchar_max_length));

- int byte_buffer_length = static_cast<int>(wchar_max_length) * 4;

SetUpErrorHandlerForToUChars(on_error, converter, &status);

- int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(),

- converter,

- byte_buffer,

- byte_buffer_length,

- encoded.data(),

- static_cast<int>(encoded.length()),

- &status);

+ char* byte_buffer =

+ reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));

+ int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);

+ const char* data = encoded.data();

+ int length = static_cast<int>(encoded.length());

+ int actual_size = 0;

+ if (offset_for_adjustment) {

+ if (*offset_for_adjustment >= encoded.length()) {

+ *offset_for_adjustment = std::wstring::npos;

+ } else if (*offset_for_adjustment != 0) {

+ // Try to adjust the offset by converting the string in two pieces and

+ // using the length of the first piece as the adjusted offset.

+ actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,

+ byte_buffer, byte_buffer_length, data,

+ static_cast<int>(*offset_for_adjustment), &status);

+ if (U_SUCCESS(status)) {

+ // Conversion succeeded, so update the offset and then fall through to

+ // appending the second half of the string.

+ data += *offset_for_adjustment;

+ length -= *offset_for_adjustment;

+ *offset_for_adjustment = actual_size / sizeof(wchar_t);

+ byte_buffer += actual_size;

+ byte_buffer_length -= actual_size;

+ } else {

+ // The offset may have been in the middle of an encoding sequence; mark

+ // it as having failed to adjust and then try to convert the entire

+ // string.

+ *offset_for_adjustment = std::wstring::npos;

+ actual_size = 0;

+ ucnv_reset(converter);

+ status = U_ZERO_ERROR;

+ }

+ actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,

+ byte_buffer, byte_buffer_length, data, length, &status);

ucnv_close(converter);

if (!U_SUCCESS(status)) {

wide->clear(); // Make sure the output is empty on error.

return false;

}

// actual_size is # of bytes.

- wide->resize(actual_size / 4);

+ wide->resize(actual_size / sizeof(wchar_t));

return true;

#endif // defined(WCHAR_T_IS_UTF32)

}

-// Converts a string of the given codepage into UTF-16.

-// If the codepage isn't found, return false.

-bool CodepageToUTF16(const std::string& encoded,

- const char* codepage_name,

- OnStringConversionError::Type on_error,

- string16* utf16) {

- utf16->clear();

- UErrorCode status = U_ZERO_ERROR;

- UConverter* converter = ucnv_open(codepage_name, &status);

- if (!U_SUCCESS(status))

- return false;

- // Even in the worst case, the maximum length in 2-byte units of UTF-16

- // output would be at most the same as the number of bytes in input. There

- // is no single-byte encoding in which a character is mapped to a

- // non-BMP character requiring two 2-byte units.

- //

- // Moreover, non-BMP characters in legacy multibyte encodings

- // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are

- // BOCU and SCSU, but we don't care about them.

- size_t uchar_max_length = encoded.length() + 1;

- SetUpErrorHandlerForToUChars(on_error, converter, &status);

- int actual_size = ucnv_toUChars(converter,

- WriteInto(utf16, uchar_max_length),

- static_cast<int>(uchar_max_length),

- encoded.data(),

- static_cast<int>(encoded.length()),

- &status);

- ucnv_close(converter);

- if (!U_SUCCESS(status)) {

- utf16->clear(); // Make sure the output is empty on error.

- return false;

- }

- utf16->resize(actual_size);

- return true;

} // namespace base

« no previous file with comments | « base/i18n/icu_string_conversions.h ('k') | base/i18n/icu_string_conversions_unittest.cc » ('j') | no next file with comments »