base/string_util_unittest.cc - Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func...

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: base/string_util_unittest.cc

Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/string_util_unittest.cc

===================================================================

--- base/string_util_unittest.cc (revision 19007)

+++ base/string_util_unittest.cc (working copy)

@@ -309,8 +309,8 @@

} convert_cases[] = {

// Regular UTF-8 input.

{"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},

- // Invalid Unicode code point.

- {"\xef\xbf\xbfHello", L"Hello", false},

+ // Non-character is passed through.

+ {"\xef\xbf\xbfHello", L"\xffffHello", true},

// Truncated UTF-8 sequence.

{"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},

// Truncated off the end.

@@ -319,11 +319,14 @@

{"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},

// This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.

{"\xed\xb0\x80", L"", false},

- // Non-BMP character. The result will either be in UTF-16 or UTF-32.

+ // Non-BMP characters. The second is a non-character regarded as valid.

+ // The result will either be in UTF-16 or UTF-32.

#if defined(WCHAR_T_IS_UTF16)

{"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},

+ {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},

#elif defined(WCHAR_T_IS_UTF32)

{"A\xF0\x90\x8C\x80z", L"A\x10300z", true},

+ {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},

#endif

};

@@ -367,8 +370,9 @@

{L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},

// Test a non-BMP character.

{L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},

- // Invalid Unicode code point.

- {L"\xffffHello", "Hello", false},

+ // Non-characters are passed through.

+ {L"\xffffHello", "\xEF\xBF\xBFHello", true},

+ {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},

// The first character is a truncated UTF-16 character.

{L"\xd800\x597d", "\xe5\xa5\xbd", false},

// Truncated at the end.

@@ -389,7 +393,7 @@

#elif defined(WCHAR_T_IS_UTF32)

// This test is only valid when wchar_t == UTF-32.

TEST(StringUtilTest, ConvertUTF32ToUTF8) {

- struct UTF8ToWideCase {

+ struct WideToUTF8Case {

const wchar_t* utf32;

const char* utf8;

bool success;

@@ -398,11 +402,14 @@

{L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},

// Test a non-BMP character.

{L"A\x10300z", "A\xF0\x90\x8C\x80z", true},

+ // Non-characters are passed through.

+ {L"\xffffHello", "\xEF\xBF\xBFHello", true},

+ {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},

// Invalid Unicode code points.

- {L"\xffffHello", "Hello", false},

{L"\xfffffffHello", "Hello", false},

// The first character is a truncated UTF-16 character.

{L"\xd800\x597d", "\xe5\xa5\xbd", false},

+ {L"\xdc01Hello", "Hello", false},

};

for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {

« no previous file with comments | « base/string_util_icu.cc ('k') | no next file » | no next file with comments »