base/i18n/icu_string_conversions_unittest.cc - Issue 1141793003: Update from https://crrev.com/329939

Side by Side Diff: base/i18n/icu_string_conversions_unittest.cc

Issue 1141793003: Update from https://crrev.com/329939 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <math.h>	5 #include <math.h>

6 #include <stdarg.h>	6 #include <stdarg.h>

7	7

8 #include <limits>	8 #include <limits>

9 #include <sstream>	9 #include <sstream>

10	10

(...skipping 24 matching lines...) Expand all Loading...
35 #elif defined(WCHAR_T_IS_UTF32)	35 #elif defined(WCHAR_T_IS_UTF32)

36 string16 u16;	36 string16 u16;

37 while (*s != 0) {	37 while (*s != 0) {

38 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu);	38 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu);

39 u16.push_back(*s++);	39 u16.push_back(*s++);

40 }	40 }

41 return u16;	41 return u16;

42 #endif	42 #endif

43 }	43 }

44	44

45 const wchar_t* const kConvertRoundtripCases[] = {

46 L"Google Video",

47 // "网页图片资讯更多 »"

48 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",

49 // "Παγκόσμιος Ιστός"

50 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"

51 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",

52 // "Поиск страниц на русском"

53 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"

54 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"

55 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",

56 // "전체서비스"

57 L"\xc804\xccb4\xc11c\xbe44\xc2a4",

58

59 // Test characters that take more than 16 bits. This will depend on whether

60 // wchar_t is 16 or 32 bits.

61 #if defined(WCHAR_T_IS_UTF16)

62 L"\xd800\xdf00",

63 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

64 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",

65 #elif defined(WCHAR_T_IS_UTF32)

66 L"\x10300",

67 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)

68 L"\x11d40\x11d41\x11d42\x11d43\x11d44",

69 #endif

70 };

71

72 } // namespace	45 } // namespace

73	46

74 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) {

75 // Make sure WideToCodepage works like WideToUTF8.

76 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {

77 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %ls",

78 i, kConvertRoundtripCases[i]));

79

80 std::string expected(WideToUTF8(kConvertRoundtripCases[i]));

81 std::string utf8;

82 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8,

83 OnStringConversionError::SKIP, &utf8));

84 EXPECT_EQ(expected, utf8);

85 }

86 }

87

88 // kConverterCodepageCases is not comprehensive. There are a number of cases	47 // kConverterCodepageCases is not comprehensive. There are a number of cases

89 // to add if we really want to have a comprehensive coverage of various	48 // to add if we really want to have a comprehensive coverage of various

90 // codepages and their 'idiosyncrasies'. Currently, the only implementation	49 // codepages and their 'idiosyncrasies'. Currently, the only implementation

91 // for CodepageTo* and *ToCodepage uses ICU, which has a very extensive	50 // for CodepageTo* and *ToCodepage uses ICU, which has a very extensive

92 // set of tests for the charset conversion. So, we can get away with a	51 // set of tests for the charset conversion. So, we can get away with a

93 // relatively small number of cases listed below.	52 // relatively small number of cases listed below.

94 //	53 //

95 // Note about \|u16_wide\| in the following struct.	54 // Note about \|u16_wide\| in the following struct.

96 // On Windows, the field is always identical to \|wide\|. On Mac and Linux,	55 // On Windows, the field is always identical to \|wide\|. On Mac and Linux,

97 // it's identical as long as there's no character outside the	56 // it's identical as long as there's no character outside the

(...skipping 128 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
226 // Thai (windows-874)	185 // Thai (windows-874)

227 {"windows-874",	186 {"windows-874",

228 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",	187 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA",

229 OnStringConversionError::FAIL,	188 OnStringConversionError::FAIL,

230 true,	189 true,

231 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"	190 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35"

232 L"\x0E04\x0E23\x0e31\x0E1A",	191 L"\x0E04\x0E23\x0e31\x0E1A",

233 NULL},	192 NULL},

234 };	193 };

235	194

236 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) {

237 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) {

238 SCOPED_TRACE(base::StringPrintf(

239 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i,

240 kConvertCodepageCases[i].encoded,

241 kConvertCodepageCases[i].codepage_name));

242

243 std::wstring wide;

244 bool success = CodepageToWide(kConvertCodepageCases[i].encoded,

245 kConvertCodepageCases[i].codepage_name,

246 kConvertCodepageCases[i].on_error,

247 &wide);

248 EXPECT_EQ(kConvertCodepageCases[i].success, success);

249 EXPECT_EQ(kConvertCodepageCases[i].wide, wide);

250

251 // When decoding was successful and nothing was skipped, we also check the

252 // reverse conversion. Not all conversions are round-trippable, but

253 // kConverterCodepageCases does not have any one-way conversion at the

254 // moment.

255 if (success &&

256 kConvertCodepageCases[i].on_error ==

257 OnStringConversionError::FAIL) {

258 std::string encoded;

259 success = WideToCodepage(wide, kConvertCodepageCases[i].codepage_name,

260 kConvertCodepageCases[i].on_error, &encoded);

261 EXPECT_EQ(kConvertCodepageCases[i].success, success);

262 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded);

263 }

264 }

265

266 // The above cases handled codepage->wide errors, but not wide->codepage.

267 // Test that here.

268 std::string encoded("Temp data"); // Make sure the string gets cleared.

269

270 // First test going to an encoding that can not represent that character.

271 EXPECT_FALSE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",

272 OnStringConversionError::FAIL, &encoded));

273 EXPECT_TRUE(encoded.empty());

274 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",

275 OnStringConversionError::SKIP, &encoded));

276 EXPECT_STREQ("Chinese", encoded.c_str());

277 // From Unicode, SUBSTITUTE is the same as SKIP for now.

278 EXPECT_TRUE(WideToCodepage(L"Chinese\xff27", "iso-8859-1",

279 OnStringConversionError::SUBSTITUTE,

280 &encoded));

281 EXPECT_STREQ("Chinese", encoded.c_str());

282

283 #if defined(WCHAR_T_IS_UTF16)

284 // When we're in UTF-16 mode, test an invalid UTF-16 character in the input.

285 EXPECT_FALSE(WideToCodepage(L"a\xd800z", "iso-8859-1",

286 OnStringConversionError::FAIL, &encoded));

287 EXPECT_TRUE(encoded.empty());

288 EXPECT_TRUE(WideToCodepage(L"a\xd800z", "iso-8859-1",

289 OnStringConversionError::SKIP, &encoded));

290 EXPECT_STREQ("az", encoded.c_str());

291 #endif // WCHAR_T_IS_UTF16

292

293 // Invalid characters should fail.

294 EXPECT_TRUE(WideToCodepage(L"a\xffffz", "iso-8859-1",

295 OnStringConversionError::SKIP, &encoded));

296 EXPECT_STREQ("az", encoded.c_str());

297

298 // Invalid codepages should fail.

299 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2",

300 OnStringConversionError::SKIP, &encoded));

301 }

302

303 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {	195 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {

304 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) {	196 for (size_t i = 0; i < arraysize(kConvertCodepageCases); ++i) {

305 SCOPED_TRACE(base::StringPrintf(	197 SCOPED_TRACE(base::StringPrintf(

306 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i,	198 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i,

307 kConvertCodepageCases[i].encoded,	199 kConvertCodepageCases[i].encoded,

308 kConvertCodepageCases[i].codepage_name));	200 kConvertCodepageCases[i].codepage_name));

309	201

310 string16 utf16;	202 string16 utf16;

311 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,	203 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded,

312 kConvertCodepageCases[i].codepage_name,	204 kConvertCodepageCases[i].codepage_name,

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
364	256

365 bool success = ConvertToUtf8AndNormalize(	257 bool success = ConvertToUtf8AndNormalize(

366 kConvertAndNormalizeCases[i].encoded,	258 kConvertAndNormalizeCases[i].encoded,

367 kConvertAndNormalizeCases[i].codepage_name, &result);	259 kConvertAndNormalizeCases[i].codepage_name, &result);

368 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success);	260 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success);

369 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result);	261 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result);

370 }	262 }

371 }	263 }

372	264

373 } // namespace base	265 } // namespace base

OLD	NEW

« no previous file with comments | « base/i18n/icu_string_conversions.cc ('k') | base/i18n/icu_util.h » ('j') | no next file with comments »