base/string_util_unittest.cc - Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func...

Side by Side Diff: base/string_util_unittest.cc

Issue 147038: Pass through non-character codepoints in UTF-8,16,32 and Wide conversion func... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <math.h>	5 #include <math.h>

6 #include <stdarg.h>	6 #include <stdarg.h>

7	7

8 #include <limits>	8 #include <limits>

9 #include <sstream>	9 #include <sstream>

10	10

(...skipping 291 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
302 }	302 }

303	303

304 TEST(StringUtilTest, ConvertUTF8ToWide) {	304 TEST(StringUtilTest, ConvertUTF8ToWide) {

305 struct UTF8ToWideCase {	305 struct UTF8ToWideCase {

306 const char* utf8;	306 const char* utf8;

307 const wchar_t* wide;	307 const wchar_t* wide;

308 bool success;	308 bool success;

309 } convert_cases[] = {	309 } convert_cases[] = {

310 // Regular UTF-8 input.	310 // Regular UTF-8 input.

311 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},	311 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true},

312 // Invalid Unicode code point.	312 // Non-character is passed through.

313 {"\xef\xbf\xbfHello", L"Hello", false},	313 {"\xef\xbf\xbfHello", L"\xffffHello", true},

314 // Truncated UTF-8 sequence.	314 // Truncated UTF-8 sequence.

315 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},	315 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false},

316 // Truncated off the end.	316 // Truncated off the end.

317 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},	317 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false},

318 // Non-shortest-form UTF-8.	318 // Non-shortest-form UTF-8.

319 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},	319 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false},

320 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.	320 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal.

321 {"\xed\xb0\x80", L"", false},	321 {"\xed\xb0\x80", L"", false},

322 // Non-BMP character. The result will either be in UTF-16 or UTF-32.	322 // Non-BMP characters. The second is a non-character regarded as valid.

	323 // The result will either be in UTF-16 or UTF-32.

323 #if defined(WCHAR_T_IS_UTF16)	324 #if defined(WCHAR_T_IS_UTF16)

324 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},	325 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true},

	326 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true},

325 #elif defined(WCHAR_T_IS_UTF32)	327 #elif defined(WCHAR_T_IS_UTF32)

326 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},	328 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true},

	329 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true},

327 #endif	330 #endif

328 };	331 };

329	332

330 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {	333 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {

331 std::wstring converted;	334 std::wstring converted;

332 EXPECT_EQ(convert_cases[i].success,	335 EXPECT_EQ(convert_cases[i].success,

333 UTF8ToWide(convert_cases[i].utf8,	336 UTF8ToWide(convert_cases[i].utf8,

334 strlen(convert_cases[i].utf8),	337 strlen(convert_cases[i].utf8),

335 &converted));	338 &converted));

336 std::wstring expected(convert_cases[i].wide);	339 std::wstring expected(convert_cases[i].wide);

(...skipping 23 matching lines...) Expand all Loading...
360 TEST(StringUtilTest, ConvertUTF16ToUTF8) {	363 TEST(StringUtilTest, ConvertUTF16ToUTF8) {

361 struct UTF16ToUTF8Case {	364 struct UTF16ToUTF8Case {

362 const wchar_t* utf16;	365 const wchar_t* utf16;

363 const char* utf8;	366 const char* utf8;

364 bool success;	367 bool success;

365 } convert_cases[] = {	368 } convert_cases[] = {

366 // Regular UTF-16 input.	369 // Regular UTF-16 input.

367 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},	370 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},

368 // Test a non-BMP character.	371 // Test a non-BMP character.

369 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},	372 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true},

370 // Invalid Unicode code point.	373 // Non-characters are passed through.

371 {L"\xffffHello", "Hello", false},	374 {L"\xffffHello", "\xEF\xBF\xBFHello", true},

	375 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true},

372 // The first character is a truncated UTF-16 character.	376 // The first character is a truncated UTF-16 character.

373 {L"\xd800\x597d", "\xe5\xa5\xbd", false},	377 {L"\xd800\x597d", "\xe5\xa5\xbd", false},

374 // Truncated at the end.	378 // Truncated at the end.

375 {L"\x597d\xd800", "\xe5\xa5\xbd", false},	379 {L"\x597d\xd800", "\xe5\xa5\xbd", false},

376 };	380 };

377	381

378 for (int i = 0; i < arraysize(convert_cases); i++) {	382 for (int i = 0; i < arraysize(convert_cases); i++) {

379 std::string converted;	383 std::string converted;

380 EXPECT_EQ(convert_cases[i].success,	384 EXPECT_EQ(convert_cases[i].success,

381 WideToUTF8(convert_cases[i].utf16,	385 WideToUTF8(convert_cases[i].utf16,

382 wcslen(convert_cases[i].utf16),	386 wcslen(convert_cases[i].utf16),

383 &converted));	387 &converted));

384 std::string expected(convert_cases[i].utf8);	388 std::string expected(convert_cases[i].utf8);

385 EXPECT_EQ(expected, converted);	389 EXPECT_EQ(expected, converted);

386 }	390 }

387 }	391 }

388	392

389 #elif defined(WCHAR_T_IS_UTF32)	393 #elif defined(WCHAR_T_IS_UTF32)

390 // This test is only valid when wchar_t == UTF-32.	394 // This test is only valid when wchar_t == UTF-32.

391 TEST(StringUtilTest, ConvertUTF32ToUTF8) {	395 TEST(StringUtilTest, ConvertUTF32ToUTF8) {

392 struct UTF8ToWideCase {	396 struct WideToUTF8Case {

393 const wchar_t* utf32;	397 const wchar_t* utf32;

394 const char* utf8;	398 const char* utf8;

395 bool success;	399 bool success;

396 } convert_cases[] = {	400 } convert_cases[] = {

397 // Regular 16-bit input.	401 // Regular 16-bit input.

398 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},	402 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true},

399 // Test a non-BMP character.	403 // Test a non-BMP character.

400 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},	404 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true},

	405 // Non-characters are passed through.

	406 {L"\xffffHello", "\xEF\xBF\xBFHello", true},

	407 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true},

401 // Invalid Unicode code points.	408 // Invalid Unicode code points.

402 {L"\xffffHello", "Hello", false},

403 {L"\xfffffffHello", "Hello", false},	409 {L"\xfffffffHello", "Hello", false},

404 // The first character is a truncated UTF-16 character.	410 // The first character is a truncated UTF-16 character.

405 {L"\xd800\x597d", "\xe5\xa5\xbd", false},	411 {L"\xd800\x597d", "\xe5\xa5\xbd", false},

	412 {L"\xdc01Hello", "Hello", false},

406 };	413 };

407	414

408 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {	415 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) {

409 std::string converted;	416 std::string converted;

410 EXPECT_EQ(convert_cases[i].success,	417 EXPECT_EQ(convert_cases[i].success,

411 WideToUTF8(convert_cases[i].utf32,	418 WideToUTF8(convert_cases[i].utf32,

412 wcslen(convert_cases[i].utf32),	419 wcslen(convert_cases[i].utf32),

413 &converted));	420 &converted));

414 std::string expected(convert_cases[i].utf8);	421 std::string expected(convert_cases[i].utf8);

415 EXPECT_EQ(expected, converted);	422 EXPECT_EQ(expected, converted);

(...skipping 1261 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1677 }	1684 }

1678 }	1685 }

1679	1686

1680 TEST(StringUtilTest, HexEncode) {	1687 TEST(StringUtilTest, HexEncode) {

1681 std::string hex(HexEncode(NULL, 0));	1688 std::string hex(HexEncode(NULL, 0));

1682 EXPECT_EQ(hex.length(), 0U);	1689 EXPECT_EQ(hex.length(), 0U);

1683 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81};	1690 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81};

1684 hex = HexEncode(bytes, sizeof(bytes));	1691 hex = HexEncode(bytes, sizeof(bytes));

1685 EXPECT_EQ(hex.compare("01FF02FE038081"), 0);	1692 EXPECT_EQ(hex.compare("01FF02FE038081"), 0);

1686 }	1693 }

OLD	NEW

« no previous file with comments | « base/string_util_icu.cc ('k') | no next file » | no next file with comments »