Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1381)

Side by Side Diff: base/i18n/icu_string_conversions_unittest.cc

Issue 1141793003: Update from https://crrev.com/329939 (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/i18n/icu_string_conversions.cc ('k') | base/i18n/icu_util.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> 5 #include <math.h>
6 #include <stdarg.h> 6 #include <stdarg.h>
7 7
8 #include <limits> 8 #include <limits>
9 #include <sstream> 9 #include <sstream>
10 10
(...skipping 24 matching lines...) Expand all
35 #elif defined(WCHAR_T_IS_UTF32) 35 #elif defined(WCHAR_T_IS_UTF32)
36 string16 u16; 36 string16 u16;
37 while (*s != 0) { 37 while (*s != 0) {
38 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu); 38 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu);
39 u16.push_back(*s++); 39 u16.push_back(*s++);
40 } 40 }
41 return u16; 41 return u16;
42 #endif 42 #endif
43 } 43 }
44 44
45 const wchar_t* const kConvertRoundtripCases[] = {
46 L"Google Video",
47 // "网页 图片 资讯更多 »"
48 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
49 // "Παγκόσμιος Ιστός"
50 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
51 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
52 // "Поиск страниц на русском"
53 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
54 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
55 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
56 // "전체서비스"
57 L"\xc804\xccb4\xc11c\xbe44\xc2a4",
58
59 // Test characters that take more than 16 bits. This will depend on whether
60 // wchar_t is 16 or 32 bits.
61 #if defined(WCHAR_T_IS_UTF16)
62 L"\xd800\xdf00",
63 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
64 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
65 #elif defined(WCHAR_T_IS_UTF32)
66 L"\x10300",
67 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
68 L"\x11d40\x11d41\x11d42\x11d43\x11d44",
69 #endif
70 };
71
72 } // namespace 45 } // namespace
73 46
74 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) {
75 // Make sure WideToCodepage works like WideToUTF8.
76 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
77 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %ls",
78 i, kConvertRoundtripCases[i]));
79
80 std::string expected(WideToUTF8(kConvertRoundtripCases[i]));
81 std::string utf8;
82 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,
83 OnStringConversionError::SKIP, &utf8));
84 EXPECT_EQ(expected, utf8);
85 }
86 }
87
88 // kConverterCodepageCases is not comprehensive. There are a number of cases 47 // kConverterCodepageCases is not comprehensive. There are a number of cases
89 // to add if we really want to have a comprehensive coverage of various 48 // to add if we really want to have a comprehensive coverage of various
90 // codepages and their 'idiosyncrasies'. Currently, the only implementation 49 // codepages and their 'idiosyncrasies'. Currently, the only implementation
91 // for CodepageTo* and *ToCodepage uses ICU, which has a very extensive 50 // for CodepageTo* and *ToCodepage uses ICU, which has a very extensive
92 // set of tests for the charset conversion. So, we can get away with a 51 // set of tests for the charset conversion. So, we can get away with a
93 // relatively small number of cases listed below. 52 // relatively small number of cases listed below.
94 // 53 //
95 // Note about |u16_wide| in the following struct. 54 // Note about |u16_wide| in the following struct.
96 // On Windows, the field is always identical to |wide|. On Mac and Linux, 55 // On Windows, the field is always identical to |wide|. On Mac and Linux,
97 // it's identical as long as there's no character outside the 56 // it's identical as long as there's no character outside the
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 // Thai (windows-874) 185 // Thai (windows-874)
227 {"windows-874", 186 {"windows-874",
228 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", 187 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",
229 OnStringConversionError::FAIL, 188 OnStringConversionError::FAIL,
230 true, 189 true,
231 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" 190 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"
232 L"\x0E04\x0E23\x0e31\x0E1A", 191 L"\x0E04\x0E23\x0e31\x0E1A",
233 NULL}, 192 NULL},
234 }; 193 };
235 194
236 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) {
237 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) {
238 SCOPED_TRACE(base::StringPrintf(
239 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i,
240 kConvertCodepageCases[i].encoded,
241 kConvertCodepageCases[i].codepage_name));
242
243 std::wstring wide;
244 bool success = CodepageToWide(kConvertCodepageCases[i].encoded,
245 kConvertCodepageCases[i].codepage_name,
246 kConvertCodepageCases[i].on_error,
247 &wide);
248 EXPECT_EQ(kConvertCodepageCases[i].success, success);
249 EXPECT_EQ(kConvertCodepageCases[i].wide, wide);
250
251 // When decoding was successful and nothing was skipped, we also check the
252 // reverse conversion. Not all conversions are round-trippable, but
253 // kConverterCodepageCases does not have any one-way conversion at the
254 // moment.
255 if (success &&
256 kConvertCodepageCases[i].on_error ==
257 OnStringConversionError::FAIL) {
258 std::string encoded;
259 success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name,
260 kConvertCodepageCases[i].on_error, &encoded);
261 EXPECT_EQ(kConvertCodepageCases[i].success, success);
262 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);
263 }
264 }
265
266 // The above cases handled codepage->wide errors, but not wide->codepage.
267 // Test that here.
268 std::string encoded("Temp data"); // Make sure the string gets cleared.
269
270 // First test going to an encoding that can not represent that character.
271 EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
272 OnStringConversionError::FAIL, &encoded));
273 EXPECT_TRUE(encoded.empty());
274 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
275 OnStringConversionError::SKIP, &encoded));
276 EXPECT_STREQ("Chinese", encoded.c_str());
277 // From Unicode, SUBSTITUTE is the same as SKIP for now.
278 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",
279 OnStringConversionError::SUBSTITUTE,
280 &encoded));
281 EXPECT_STREQ("Chinese", encoded.c_str());
282
283 #if defined(WCHAR_T_IS_UTF16)
284 // When we're in UTF-16 mode, test an invalid UTF-16 character in the input.
285 EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1",
286 OnStringConversionError::FAIL, &encoded));
287 EXPECT_TRUE(encoded.empty());
288 EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1",
289 OnStringConversionError::SKIP, &encoded));
290 EXPECT_STREQ("az", encoded.c_str());
291 #endif // WCHAR_T_IS_UTF16
292
293 // Invalid characters should fail.
294 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",
295 OnStringConversionError::SKIP, &encoded));
296 EXPECT_STREQ("az", encoded.c_str());
297
298 // Invalid codepages should fail.
299 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",
300 OnStringConversionError::SKIP, &encoded));
301 }
302
303 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { 195 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {
304 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) { 196 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) {
305 SCOPED_TRACE(base::StringPrintf( 197 SCOPED_TRACE(base::StringPrintf(
306 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, 198 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i,
307 kConvertCodepageCases[i].encoded, 199 kConvertCodepageCases[i].encoded,
308 kConvertCodepageCases[i].codepage_name)); 200 kConvertCodepageCases[i].codepage_name));
309 201
310 string16 utf16; 202 string16 utf16;
311 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded, 203 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,
312 kConvertCodepageCases[i].codepage_name, 204 kConvertCodepageCases[i].codepage_name,
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
364 256
365 bool success = ConvertToUtf8AndNormalize( 257 bool success = ConvertToUtf8AndNormalize(
366 kConvertAndNormalizeCases[i].encoded, 258 kConvertAndNormalizeCases[i].encoded,
367 kConvertAndNormalizeCases[i].codepage_name, &result); 259 kConvertAndNormalizeCases[i].codepage_name, &result);
368 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); 260 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success);
369 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); 261 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result);
370 } 262 }
371 } 263 }
372 264
373 } // namespace base 265 } // namespace base
OLDNEW
« no previous file with comments | « base/i18n/icu_string_conversions.cc ('k') | base/i18n/icu_util.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698