Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: base/string_util_unittest.cc

Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/string_util_icu.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> 5 #include <math.h>
6 #include <stdarg.h> 6 #include <stdarg.h>
7 7
8 #include <limits> 8 #include <limits>
9 #include <sstream> 9 #include <sstream>
10 10
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
302 } 302 }
303 303
304 TEST(StringUtilTest, ConvertUTF8ToWide) { 304 TEST(StringUtilTest, ConvertUTF8ToWide) {
305 struct UTF8ToWideCase { 305 struct UTF8ToWideCase {
306 const char* utf8; 306 const char* utf8;
307 const wchar_t* wide; 307 const wchar_t* wide;
308 bool success; 308 bool success;
309 } convert_cases[] = { 309 } convert_cases[] = {
310 // Regular UTF-8 input. 310 // Regular UTF-8 input.
311 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, 311 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},
312 // Invalid Unicode code point. 312 // Non-character is passed through.
313 {"\xef\xbf\xbfHello", L"Hello", false}, 313 {"\xef\xbf\xbfHello", L"\xffffHello", true},
314 // Truncated UTF-8 sequence. 314 // Truncated UTF-8 sequence.
315 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, 315 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},
316 // Truncated off the end. 316 // Truncated off the end.
317 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, 317 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},
318 // Non-shortest-form UTF-8. 318 // Non-shortest-form UTF-8.
319 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, 319 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},
320 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. 320 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.
321 {"\xed\xb0\x80", L"", false}, 321 {"\xed\xb0\x80", L"", false},
322 // Non-BMP character. The result will either be in UTF-16 or UTF-32. 322 // Non-BMP characters. The second is a non-character regarded as valid.
323 // The result will either be in UTF-16 or UTF-32.
323 #if defined(WCHAR_T_IS_UTF16) 324 #if defined(WCHAR_T_IS_UTF16)
324 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, 325 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},
326 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},
325 #elif defined(WCHAR_T_IS_UTF32) 327 #elif defined(WCHAR_T_IS_UTF32)
326 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, 328 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},
329 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},
327 #endif 330 #endif
328 }; 331 };
329 332
330 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { 333 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
331 std::wstring converted; 334 std::wstring converted;
332 EXPECT_EQ(convert_cases[i].success, 335 EXPECT_EQ(convert_cases[i].success,
333 UTF8ToWide(convert_cases[i].utf8, 336 UTF8ToWide(convert_cases[i].utf8,
334 strlen(convert_cases[i].utf8), 337 strlen(convert_cases[i].utf8),
335 &converted)); 338 &converted));
336 std::wstring expected(convert_cases[i].wide); 339 std::wstring expected(convert_cases[i].wide);
(...skipping 23 matching lines...) Expand all
360 TEST(StringUtilTest, ConvertUTF16ToUTF8) { 363 TEST(StringUtilTest, ConvertUTF16ToUTF8) {
361 struct UTF16ToUTF8Case { 364 struct UTF16ToUTF8Case {
362 const wchar_t* utf16; 365 const wchar_t* utf16;
363 const char* utf8; 366 const char* utf8;
364 bool success; 367 bool success;
365 } convert_cases[] = { 368 } convert_cases[] = {
366 // Regular UTF-16 input. 369 // Regular UTF-16 input.
367 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, 370 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
368 // Test a non-BMP character. 371 // Test a non-BMP character.
369 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, 372 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},
370 // Invalid Unicode code point. 373 // Non-characters are passed through.
371 {L"\xffffHello", "Hello", false}, 374 {L"\xffffHello", "\xEF\xBF\xBFHello", true},
375 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},
372 // The first character is a truncated UTF-16 character. 376 // The first character is a truncated UTF-16 character.
373 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, 377 {L"\xd800\x597d", "\xe5\xa5\xbd", false},
374 // Truncated at the end. 378 // Truncated at the end.
375 {L"\x597d\xd800", "\xe5\xa5\xbd", false}, 379 {L"\x597d\xd800", "\xe5\xa5\xbd", false},
376 }; 380 };
377 381
378 for (int i = 0; i < arraysize(convert_cases); i++) { 382 for (int i = 0; i < arraysize(convert_cases); i++) {
379 std::string converted; 383 std::string converted;
380 EXPECT_EQ(convert_cases[i].success, 384 EXPECT_EQ(convert_cases[i].success,
381 WideToUTF8(convert_cases[i].utf16, 385 WideToUTF8(convert_cases[i].utf16,
382 wcslen(convert_cases[i].utf16), 386 wcslen(convert_cases[i].utf16),
383 &converted)); 387 &converted));
384 std::string expected(convert_cases[i].utf8); 388 std::string expected(convert_cases[i].utf8);
385 EXPECT_EQ(expected, converted); 389 EXPECT_EQ(expected, converted);
386 } 390 }
387 } 391 }
388 392
389 #elif defined(WCHAR_T_IS_UTF32) 393 #elif defined(WCHAR_T_IS_UTF32)
390 // This test is only valid when wchar_t == UTF-32. 394 // This test is only valid when wchar_t == UTF-32.
391 TEST(StringUtilTest, ConvertUTF32ToUTF8) { 395 TEST(StringUtilTest, ConvertUTF32ToUTF8) {
392 struct UTF8ToWideCase { 396 struct WideToUTF8Case {
393 const wchar_t* utf32; 397 const wchar_t* utf32;
394 const char* utf8; 398 const char* utf8;
395 bool success; 399 bool success;
396 } convert_cases[] = { 400 } convert_cases[] = {
397 // Regular 16-bit input. 401 // Regular 16-bit input.
398 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, 402 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},
399 // Test a non-BMP character. 403 // Test a non-BMP character.
400 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, 404 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},
405 // Non-characters are passed through.
406 {L"\xffffHello", "\xEF\xBF\xBFHello", true},
407 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},
401 // Invalid Unicode code points. 408 // Invalid Unicode code points.
402 {L"\xffffHello", "Hello", false},
403 {L"\xfffffffHello", "Hello", false}, 409 {L"\xfffffffHello", "Hello", false},
404 // The first character is a truncated UTF-16 character. 410 // The first character is a truncated UTF-16 character.
405 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, 411 {L"\xd800\x597d", "\xe5\xa5\xbd", false},
412 {L"\xdc01Hello", "Hello", false},
406 }; 413 };
407 414
408 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { 415 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {
409 std::string converted; 416 std::string converted;
410 EXPECT_EQ(convert_cases[i].success, 417 EXPECT_EQ(convert_cases[i].success,
411 WideToUTF8(convert_cases[i].utf32, 418 WideToUTF8(convert_cases[i].utf32,
412 wcslen(convert_cases[i].utf32), 419 wcslen(convert_cases[i].utf32),
413 &converted)); 420 &converted));
414 std::string expected(convert_cases[i].utf8); 421 std::string expected(convert_cases[i].utf8);
415 EXPECT_EQ(expected, converted); 422 EXPECT_EQ(expected, converted);
(...skipping 1261 matching lines...) Expand 10 before | Expand all | Expand 10 after
1677 } 1684 }
1678 } 1685 }
1679 1686
1680 TEST(StringUtilTest, HexEncode) { 1687 TEST(StringUtilTest, HexEncode) {
1681 std::string hex(HexEncode(NULL, 0)); 1688 std::string hex(HexEncode(NULL, 0));
1682 EXPECT_EQ(hex.length(), 0U); 1689 EXPECT_EQ(hex.length(), 0U);
1683 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81}; 1690 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81};
1684 hex = HexEncode(bytes, sizeof(bytes)); 1691 hex = HexEncode(bytes, sizeof(bytes));
1685 EXPECT_EQ(hex.compare("01FF02FE038081"), 0); 1692 EXPECT_EQ(hex.compare("01FF02FE038081"), 0);
1686 } 1693 }
OLDNEW
« no previous file with comments | « base/string_util_icu.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698