Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Side by Side Diff: base/utf_string_conversions_unittest.cc

Issue 522029: If we can't read a unicode character, write the standard "unknown" (0xFFFD) c... (Closed) Base URL: svn://chrome-svn.corp.google.com/chrome/trunk/src/
Patch Set: '' Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/utf_string_conversions.cc ('k') | chrome/common/zip_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/basictypes.h" 5 #include "base/basictypes.h"
6 #include "base/string_util.h" 6 #include "base/string_util.h"
7 #include "testing/gtest/include/gtest/gtest.h" 7 #include "testing/gtest/include/gtest/gtest.h"
8 8
9 namespace base { 9 namespace base {
10 10
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
87 struct UTF8ToWideCase { 87 struct UTF8ToWideCase {
88 const char* utf8; 88 const char* utf8;
89 const wchar_t* wide; 89 const wchar_t* wide;
90 bool success; 90 bool success;
91 } convert_cases[] = { 91 } convert_cases[] = {
92 // Regular UTF-8 input. 92 // Regular UTF-8 input.
93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, 93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
94 // Non-character is passed through. 94 // Non-character is passed through.
95 {"\xef\xbf\xbfHello", L"\xffffHello", true}, 95 {"\xef\xbf\xbfHello", L"\xffffHello", true},
96 // Truncated UTF-8 sequence. 96 // Truncated UTF-8 sequence.
97 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, 97 {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
98 // Truncated off the end. 98 // Truncated off the end.
99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, 99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false},
100 // Non-shortest-form UTF-8. 100 // Non-shortest-form UTF-8.
101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, 101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false},
102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. 102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
103 {"\xed\xb0\x80", L"", false}, 103 {"\xed\xb0\x80", L"\xfffd", false},
104 // Non-BMP characters. The second is a non-character regarded as valid. 104 // Non-BMP characters. The second is a non-character regarded as valid.
105 // The result will either be in UTF-16 or UTF-32. 105 // The result will either be in UTF-16 or UTF-32.
106 #if defined(WCHAR_T_IS_UTF16) 106 #if defined(WCHAR_T_IS_UTF16)
107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, 107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, 108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
109 #elif defined(WCHAR_T_IS_UTF32) 109 #elif defined(WCHAR_T_IS_UTF32)
110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, 110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, 111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
112 #endif 112 #endif
113 }; 113 };
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 bool success; 145 bool success;
146 } convert_cases[] = { 146 } convert_cases[] = {
147 // Regular UTF-16 input. 147 // Regular UTF-16 input.
148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, 148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
149 // Test a non-BMP character. 149 // Test a non-BMP character.
150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, 150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
151 // Non-characters are passed through. 151 // Non-characters are passed through.
152 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, 152 {L"\xffffHello", "\xEF\xBF\xBFHello", true},
153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, 153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
154 // The first character is a truncated UTF-16 character. 154 // The first character is a truncated UTF-16 character.
155 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, 155 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},
156 // Truncated at the end. 156 // Truncated at the end.
157 {L"\x597d\xd800", "\xe5\xa5\xbd", false}, 157 {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false},
158 }; 158 };
159 159
160 for (int i = 0; i < arraysize(convert_cases); i++) { 160 for (int i = 0; i < arraysize(convert_cases); i++) {
161 std::string converted; 161 std::string converted;
162 EXPECT_EQ(convert_cases[i].success, 162 EXPECT_EQ(convert_cases[i].success,
163 WideToUTF8(convert_cases[i].utf16, 163 WideToUTF8(convert_cases[i].utf16,
164 wcslen(convert_cases[i].utf16), 164 wcslen(convert_cases[i].utf16),
165 &converted)); 165 &converted));
166 std::string expected(convert_cases[i].utf8); 166 std::string expected(convert_cases[i].utf8);
167 EXPECT_EQ(expected, converted); 167 EXPECT_EQ(expected, converted);
168 } 168 }
169 } 169 }
170 170
171 #elif defined(WCHAR_T_IS_UTF32) 171 #elif defined(WCHAR_T_IS_UTF32)
172 // This test is only valid when wchar_t == UTF-32. 172 // This test is only valid when wchar_t == UTF-32.
173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { 173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) {
174 struct WideToUTF8Case { 174 struct WideToUTF8Case {
175 const wchar_t* utf32; 175 const wchar_t* utf32;
176 const char* utf8; 176 const char* utf8;
177 bool success; 177 bool success;
178 } convert_cases[] = { 178 } convert_cases[] = {
179 // Regular 16-bit input. 179 // Regular 16-bit input.
180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, 180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
181 // Test a non-BMP character. 181 // Test a non-BMP character.
182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, 182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
183 // Non-characters are passed through. 183 // Non-characters are passed through.
184 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, 184 {L"\xffffHello", "\xEF\xBF\xBFHello", true},
185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, 185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
186 // Invalid Unicode code points. 186 // Invalid Unicode code points.
187 {L"\xfffffffHello", "Hello", false}, 187 {L"\xfffffffHello", "\xEF\xBF\xBDHello", false},
188 // The first character is a truncated UTF-16 character. 188 // The first character is a truncated UTF-16 character.
189 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, 189 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false},
190 {L"\xdc01Hello", "Hello", false}, 190 {L"\xdc01Hello", "\xef\xbf\xbdHello", false},
191 }; 191 };
192 192
193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { 193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
194 std::string converted; 194 std::string converted;
195 EXPECT_EQ(convert_cases[i].success, 195 EXPECT_EQ(convert_cases[i].success,
196 WideToUTF8(convert_cases[i].utf32, 196 WideToUTF8(convert_cases[i].utf32,
197 wcslen(convert_cases[i].utf32), 197 wcslen(convert_cases[i].utf32),
198 &converted)); 198 &converted));
199 std::string expected(convert_cases[i].utf8); 199 std::string expected(convert_cases[i].utf8);
200 EXPECT_EQ(expected, converted); 200 EXPECT_EQ(expected, converted);
(...skipping 19 matching lines...) Expand all
220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); 220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length());
221 std::string expected; 221 std::string expected;
222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); 222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi));
223 EXPECT_EQ(arraysize(multi) - 1, expected.length()); 223 EXPECT_EQ(arraysize(multi) - 1, expected.length());
224 const std::string& converted = WideToUTF8(wmultistring); 224 const std::string& converted = WideToUTF8(wmultistring);
225 EXPECT_EQ(arraysize(multi) - 1, converted.length()); 225 EXPECT_EQ(arraysize(multi) - 1, converted.length());
226 EXPECT_EQ(expected, converted); 226 EXPECT_EQ(expected, converted);
227 } 227 }
228 228
229 } // namaspace base 229 } // namaspace base
OLDNEW
« no previous file with comments | « base/utf_string_conversions.cc ('k') | chrome/common/zip_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698