Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Unified Diff: base/string_util_unittest.cc

Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/string_util_icu.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/string_util_unittest.cc
===================================================================
--- base/string_util_unittest.cc (revision 19007)
+++ base/string_util_unittest.cc (working copy)
@@ -309,8 +309,8 @@
} convert_cases[] = {
// Regular UTF-8 input.
{"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
- // Invalid Unicode code point.
- {"\xef\xbf\xbfHello", L"Hello", false},
+ // Non-character is passed through.
+ {"\xef\xbf\xbfHello", L"\xffffHello", true},
// Truncated UTF-8 sequence.
{"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},
// Truncated off the end.
@@ -319,11 +319,14 @@
{"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},
// This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
{"\xed\xb0\x80", L"", false},
- // Non-BMP character. The result will either be in UTF-16 or UTF-32.
+ // Non-BMP characters. The second is a non-character regarded as valid.
+ // The result will either be in UTF-16 or UTF-32.
#if defined(WCHAR_T_IS_UTF16)
{"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
+ {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
#elif defined(WCHAR_T_IS_UTF32)
{"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
+ {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
#endif
};
@@ -367,8 +370,9 @@
{L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
// Test a non-BMP character.
{L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
- // Invalid Unicode code point.
- {L"\xffffHello", "Hello", false},
+ // Non-characters are passed through.
+ {L"\xffffHello", "\xEF\xBF\xBFHello", true},
+ {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
// The first character is a truncated UTF-16 character.
{L"\xd800\x597d", "\xe5\xa5\xbd", false},
// Truncated at the end.
@@ -389,7 +393,7 @@
#elif defined(WCHAR_T_IS_UTF32)
// This test is only valid when wchar_t == UTF-32.
TEST(StringUtilTest, ConvertUTF32ToUTF8) {
- struct UTF8ToWideCase {
+ struct WideToUTF8Case {
const wchar_t* utf32;
const char* utf8;
bool success;
@@ -398,11 +402,14 @@
{L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
// Test a non-BMP character.
{L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
+ // Non-characters are passed through.
+ {L"\xffffHello", "\xEF\xBF\xBFHello", true},
+ {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
// Invalid Unicode code points.
- {L"\xffffHello", "Hello", false},
{L"\xfffffffHello", "Hello", false},
// The first character is a truncated UTF-16 character.
{L"\xd800\x597d", "\xe5\xa5\xbd", false},
+ {L"\xdc01Hello", "Hello", false},
};
for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
« no previous file with comments | « base/string_util_icu.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698