OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <stdarg.h> | 6 #include <stdarg.h> |
7 | 7 |
8 #include <limits> | 8 #include <limits> |
9 #include <sstream> | 9 #include <sstream> |
10 | 10 |
(...skipping 24 matching lines...) Expand all Loading... |
35 #elif defined(WCHAR_T_IS_UTF32) | 35 #elif defined(WCHAR_T_IS_UTF32) |
36 string16 u16; | 36 string16 u16; |
37 while (*s != 0) { | 37 while (*s != 0) { |
38 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu); | 38 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu); |
39 u16.push_back(*s++); | 39 u16.push_back(*s++); |
40 } | 40 } |
41 return u16; | 41 return u16; |
42 #endif | 42 #endif |
43 } | 43 } |
44 | 44 |
45 const wchar_t* const kConvertRoundtripCases[] = { | |
46 L"Google Video", | |
47 // "网页 图片 资讯更多 »" | |
48 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", | |
49 // "Παγκόσμιος Ιστός" | |
50 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
51 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", | |
52 // "Поиск страниц на русском" | |
53 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" | |
54 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" | |
55 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", | |
56 // "전체서비스" | |
57 L"\xc804\xccb4\xc11c\xbe44\xc2a4", | |
58 | |
59 // Test characters that take more than 16 bits. This will depend on whether | |
60 // wchar_t is 16 or 32 bits. | |
61 #if defined(WCHAR_T_IS_UTF16) | |
62 L"\xd800\xdf00", | |
63 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
64 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", | |
65 #elif defined(WCHAR_T_IS_UTF32) | |
66 L"\x10300", | |
67 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
68 L"\x11d40\x11d41\x11d42\x11d43\x11d44", | |
69 #endif | |
70 }; | |
71 | |
72 } // namespace | 45 } // namespace |
73 | 46 |
74 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) { | |
75 // Make sure WideToCodepage works like WideToUTF8. | |
76 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
77 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %ls", | |
78 i, kConvertRoundtripCases[i])); | |
79 | |
80 std::string expected(WideToUTF8(kConvertRoundtripCases[i])); | |
81 std::string utf8; | |
82 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8, | |
83 OnStringConversionError::SKIP, &utf8)); | |
84 EXPECT_EQ(expected, utf8); | |
85 } | |
86 } | |
87 | |
88 // kConverterCodepageCases is not comprehensive. There are a number of cases | 47 // kConverterCodepageCases is not comprehensive. There are a number of cases |
89 // to add if we really want to have a comprehensive coverage of various | 48 // to add if we really want to have a comprehensive coverage of various |
90 // codepages and their 'idiosyncrasies'. Currently, the only implementation | 49 // codepages and their 'idiosyncrasies'. Currently, the only implementation |
91 // for CodepageTo* and *ToCodepage uses ICU, which has a very extensive | 50 // for CodepageTo* and *ToCodepage uses ICU, which has a very extensive |
92 // set of tests for the charset conversion. So, we can get away with a | 51 // set of tests for the charset conversion. So, we can get away with a |
93 // relatively small number of cases listed below. | 52 // relatively small number of cases listed below. |
94 // | 53 // |
95 // Note about |u16_wide| in the following struct. | 54 // Note about |u16_wide| in the following struct. |
96 // On Windows, the field is always identical to |wide|. On Mac and Linux, | 55 // On Windows, the field is always identical to |wide|. On Mac and Linux, |
97 // it's identical as long as there's no character outside the | 56 // it's identical as long as there's no character outside the |
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
226 // Thai (windows-874) | 185 // Thai (windows-874) |
227 {"windows-874", | 186 {"windows-874", |
228 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", | 187 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", |
229 OnStringConversionError::FAIL, | 188 OnStringConversionError::FAIL, |
230 true, | 189 true, |
231 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" | 190 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" |
232 L"\x0E04\x0E23\x0e31\x0E1A", | 191 L"\x0E04\x0E23\x0e31\x0E1A", |
233 NULL}, | 192 NULL}, |
234 }; | 193 }; |
235 | 194 |
236 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) { | |
237 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) { | |
238 SCOPED_TRACE(base::StringPrintf( | |
239 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, | |
240 kConvertCodepageCases[i].encoded, | |
241 kConvertCodepageCases[i].codepage_name)); | |
242 | |
243 std::wstring wide; | |
244 bool success = CodepageToWide(kConvertCodepageCases[i].encoded, | |
245 kConvertCodepageCases[i].codepage_name, | |
246 kConvertCodepageCases[i].on_error, | |
247 &wide); | |
248 EXPECT_EQ(kConvertCodepageCases[i].success, success); | |
249 EXPECT_EQ(kConvertCodepageCases[i].wide, wide); | |
250 | |
251 // When decoding was successful and nothing was skipped, we also check the | |
252 // reverse conversion. Not all conversions are round-trippable, but | |
253 // kConverterCodepageCases does not have any one-way conversion at the | |
254 // moment. | |
255 if (success && | |
256 kConvertCodepageCases[i].on_error == | |
257 OnStringConversionError::FAIL) { | |
258 std::string encoded; | |
259 success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name, | |
260 kConvertCodepageCases[i].on_error, &encoded); | |
261 EXPECT_EQ(kConvertCodepageCases[i].success, success); | |
262 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded); | |
263 } | |
264 } | |
265 | |
266 // The above cases handled codepage->wide errors, but not wide->codepage. | |
267 // Test that here. | |
268 std::string encoded("Temp data"); // Make sure the string gets cleared. | |
269 | |
270 // First test going to an encoding that can not represent that character. | |
271 EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", | |
272 OnStringConversionError::FAIL, &encoded)); | |
273 EXPECT_TRUE(encoded.empty()); | |
274 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", | |
275 OnStringConversionError::SKIP, &encoded)); | |
276 EXPECT_STREQ("Chinese", encoded.c_str()); | |
277 // From Unicode, SUBSTITUTE is the same as SKIP for now. | |
278 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1", | |
279 OnStringConversionError::SUBSTITUTE, | |
280 &encoded)); | |
281 EXPECT_STREQ("Chinese", encoded.c_str()); | |
282 | |
283 #if defined(WCHAR_T_IS_UTF16) | |
284 // When we're in UTF-16 mode, test an invalid UTF-16 character in the input. | |
285 EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1", | |
286 OnStringConversionError::FAIL, &encoded)); | |
287 EXPECT_TRUE(encoded.empty()); | |
288 EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1", | |
289 OnStringConversionError::SKIP, &encoded)); | |
290 EXPECT_STREQ("az", encoded.c_str()); | |
291 #endif // WCHAR_T_IS_UTF16 | |
292 | |
293 // Invalid characters should fail. | |
294 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1", | |
295 OnStringConversionError::SKIP, &encoded)); | |
296 EXPECT_STREQ("az", encoded.c_str()); | |
297 | |
298 // Invalid codepages should fail. | |
299 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2", | |
300 OnStringConversionError::SKIP, &encoded)); | |
301 } | |
302 | |
303 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { | 195 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { |
304 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) { | 196 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) { |
305 SCOPED_TRACE(base::StringPrintf( | 197 SCOPED_TRACE(base::StringPrintf( |
306 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, | 198 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, |
307 kConvertCodepageCases[i].encoded, | 199 kConvertCodepageCases[i].encoded, |
308 kConvertCodepageCases[i].codepage_name)); | 200 kConvertCodepageCases[i].codepage_name)); |
309 | 201 |
310 string16 utf16; | 202 string16 utf16; |
311 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded, | 203 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded, |
312 kConvertCodepageCases[i].codepage_name, | 204 kConvertCodepageCases[i].codepage_name, |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
364 | 256 |
365 bool success = ConvertToUtf8AndNormalize( | 257 bool success = ConvertToUtf8AndNormalize( |
366 kConvertAndNormalizeCases[i].encoded, | 258 kConvertAndNormalizeCases[i].encoded, |
367 kConvertAndNormalizeCases[i].codepage_name, &result); | 259 kConvertAndNormalizeCases[i].codepage_name, &result); |
368 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); | 260 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); |
369 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); | 261 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); |
370 } | 262 } |
371 } | 263 } |
372 | 264 |
373 } // namespace base | 265 } // namespace base |
OLD | NEW |