Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2695)

Unified Diff: base/i18n/icu_string_conversions.cc

Issue 372017: Fix various problems with inline autocomplete and URLs that change length dur... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/i18n/icu_string_conversions.h ('k') | base/i18n/icu_string_conversions_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/i18n/icu_string_conversions.cc
===================================================================
--- base/i18n/icu_string_conversions.cc (revision 31214)
+++ base/i18n/icu_string_conversions.cc (working copy)
@@ -157,6 +157,90 @@
// Codepage <-> Wide/UTF-16 ---------------------------------------------------
+// Convert a UTF-16 string into the specified codepage_name. If the codepage
+// isn't found, return false.
+bool UTF16ToCodepage(const string16& utf16,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ std::string* encoded) {
+ encoded->clear();
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* converter = ucnv_open(codepage_name, &status);
+ if (!U_SUCCESS(status))
+ return false;
+
+ return ConvertFromUTF16(converter, utf16.c_str(),
+ static_cast<int>(utf16.length()), on_error, encoded);
+}
+
+bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ string16* utf16,
+ size_t* offset_for_adjustment) {
+ utf16->clear();
+
+ UErrorCode status = U_ZERO_ERROR;
+ UConverter* converter = ucnv_open(codepage_name, &status);
+ if (!U_SUCCESS(status))
+ return false;
+
+ // Even in the worst case, the maximum length in 2-byte units of UTF-16
+ // output would be at most the same as the number of bytes in input. There
+ // is no single-byte encoding in which a character is mapped to a
+ // non-BMP character requiring two 2-byte units.
+ //
+ // Moreover, non-BMP characters in legacy multibyte encodings
+ // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
+ // BOCU and SCSU, but we don't care about them.
+ size_t uchar_max_length = encoded.length() + 1;
+
+ SetUpErrorHandlerForToUChars(on_error, converter, &status);
+ char16* byte_buffer = WriteInto(utf16, uchar_max_length);
+ int byte_buffer_length = static_cast<int>(uchar_max_length);
+ const char* data = encoded.data();
+ int length = static_cast<int>(encoded.length());
+ int actual_size = 0;
+ if (offset_for_adjustment) {
+ if (*offset_for_adjustment >= encoded.length()) {
+ *offset_for_adjustment = string16::npos;
+ } else if (*offset_for_adjustment != 0) {
+ // Try to adjust the offset by converting the string in two pieces and
+ // using the length of the first piece as the adjusted offset.
+ actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,
+ data, static_cast<int>(*offset_for_adjustment), &status);
+ if (U_SUCCESS(status)) {
+ // Conversion succeeded, so update the offset and then fall through to
+ // appending the second half of the string.
+ data += *offset_for_adjustment;
+ length -= *offset_for_adjustment;
+ *offset_for_adjustment = actual_size;
+ byte_buffer += actual_size;
+ byte_buffer_length -= actual_size;
+ } else {
+ // The offset may have been in the middle of an encoding sequence; mark
+ // it as having failed to adjust and then try to convert the entire
+ // string.
+ *offset_for_adjustment = string16::npos;
+ actual_size = 0;
+ ucnv_reset(converter);
+ status = U_ZERO_ERROR;
+ }
+ }
+ }
+ actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,
+ length, &status);
+ ucnv_close(converter);
+ if (!U_SUCCESS(status)) {
+ utf16->clear(); // Make sure the output is empty on error.
+ return false;
+ }
+
+ utf16->resize(actual_size);
+ return true;
+}
+
// Convert a wstring into the specified codepage_name. If the codepage
// isn't found, return false.
bool WideToCodepage(const std::wstring& wide,
@@ -188,31 +272,16 @@
#endif // defined(WCHAR_T_IS_UTF32)
}
-// Convert a UTF-16 string into the specified codepage_name. If the codepage
-// isn't found, return false.
-bool UTF16ToCodepage(const string16& utf16,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::string* encoded) {
- encoded->clear();
-
- UErrorCode status = U_ZERO_ERROR;
- UConverter* converter = ucnv_open(codepage_name, &status);
- if (!U_SUCCESS(status))
- return false;
-
- return ConvertFromUTF16(converter, utf16.c_str(),
- static_cast<int>(utf16.length()), on_error, encoded);
-}
-
// Converts a string of the given codepage into wstring.
// If the codepage isn't found, return false.
-bool CodepageToWide(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- std::wstring* wide) {
+bool CodepageToWideAndAdjustOffset(const std::string& encoded,
+ const char* codepage_name,
+ OnStringConversionError::Type on_error,
+ std::wstring* wide,
+ size_t* offset_for_adjustment) {
#if defined(WCHAR_T_IS_UTF16)
- return CodepageToUTF16(encoded, codepage_name, on_error, wide);
+ return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,
+ offset_for_adjustment);
#elif defined(WCHAR_T_IS_UTF32)
wide->clear();
@@ -227,70 +296,53 @@
// this can be 4 times larger than actually needed.
size_t wchar_max_length = encoded.length() + 1;
- // The byte buffer and its length to pass to ucnv_toAlgorithimic.
- char* byte_buffer = reinterpret_cast<char*>(
- WriteInto(wide, wchar_max_length));
- int byte_buffer_length = static_cast<int>(wchar_max_length) * 4;
-
SetUpErrorHandlerForToUChars(on_error, converter, &status);
- int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(),
- converter,
- byte_buffer,
- byte_buffer_length,
- encoded.data(),
- static_cast<int>(encoded.length()),
- &status);
+ char* byte_buffer =
+ reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));
+ int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);
+ const char* data = encoded.data();
+ int length = static_cast<int>(encoded.length());
+ int actual_size = 0;
+ if (offset_for_adjustment) {
+ if (*offset_for_adjustment >= encoded.length()) {
+ *offset_for_adjustment = std::wstring::npos;
+ } else if (*offset_for_adjustment != 0) {
+ // Try to adjust the offset by converting the string in two pieces and
+ // using the length of the first piece as the adjusted offset.
+ actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+ byte_buffer, byte_buffer_length, data,
+ static_cast<int>(*offset_for_adjustment), &status);
+ if (U_SUCCESS(status)) {
+ // Conversion succeeded, so update the offset and then fall through to
+ // appending the second half of the string.
+ data += *offset_for_adjustment;
+ length -= *offset_for_adjustment;
+ *offset_for_adjustment = actual_size / sizeof(wchar_t);
+ byte_buffer += actual_size;
+ byte_buffer_length -= actual_size;
+ } else {
+ // The offset may have been in the middle of an encoding sequence; mark
+ // it as having failed to adjust and then try to convert the entire
+ // string.
+ *offset_for_adjustment = std::wstring::npos;
+ actual_size = 0;
+ ucnv_reset(converter);
+ status = U_ZERO_ERROR;
+ }
+ }
+ }
+ actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+ byte_buffer, byte_buffer_length, data, length, &status);
ucnv_close(converter);
-
if (!U_SUCCESS(status)) {
wide->clear(); // Make sure the output is empty on error.
return false;
}
// actual_size is # of bytes.
- wide->resize(actual_size / 4);
+ wide->resize(actual_size / sizeof(wchar_t));
return true;
#endif // defined(WCHAR_T_IS_UTF32)
}
-// Converts a string of the given codepage into UTF-16.
-// If the codepage isn't found, return false.
-bool CodepageToUTF16(const std::string& encoded,
- const char* codepage_name,
- OnStringConversionError::Type on_error,
- string16* utf16) {
- utf16->clear();
-
- UErrorCode status = U_ZERO_ERROR;
- UConverter* converter = ucnv_open(codepage_name, &status);
- if (!U_SUCCESS(status))
- return false;
-
- // Even in the worst case, the maximum length in 2-byte units of UTF-16
- // output would be at most the same as the number of bytes in input. There
- // is no single-byte encoding in which a character is mapped to a
- // non-BMP character requiring two 2-byte units.
- //
- // Moreover, non-BMP characters in legacy multibyte encodings
- // (e.g. EUC-JP, GB18030) take at least 2 bytes. The only exceptions are
- // BOCU and SCSU, but we don't care about them.
- size_t uchar_max_length = encoded.length() + 1;
-
- SetUpErrorHandlerForToUChars(on_error, converter, &status);
- int actual_size = ucnv_toUChars(converter,
- WriteInto(utf16, uchar_max_length),
- static_cast<int>(uchar_max_length),
- encoded.data(),
- static_cast<int>(encoded.length()),
- &status);
- ucnv_close(converter);
- if (!U_SUCCESS(status)) {
- utf16->clear(); // Make sure the output is empty on error.
- return false;
- }
-
- utf16->resize(actual_size);
- return true;
-}
-
} // namespace base
« no previous file with comments | « base/i18n/icu_string_conversions.h ('k') | base/i18n/icu_string_conversions_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698