OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <stdarg.h> | 6 #include <stdarg.h> |
7 | 7 |
8 #include <limits> | 8 #include <limits> |
9 #include <sstream> | 9 #include <sstream> |
10 | 10 |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
302 } | 302 } |
303 | 303 |
304 TEST(StringUtilTest, ConvertUTF8ToWide) { | 304 TEST(StringUtilTest, ConvertUTF8ToWide) { |
305 struct UTF8ToWideCase { | 305 struct UTF8ToWideCase { |
306 const char* utf8; | 306 const char* utf8; |
307 const wchar_t* wide; | 307 const wchar_t* wide; |
308 bool success; | 308 bool success; |
309 } convert_cases[] = { | 309 } convert_cases[] = { |
310 // Regular UTF-8 input. | 310 // Regular UTF-8 input. |
311 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, | 311 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, |
312 // Invalid Unicode code point. | 312 // Non-character is passed through. |
313 {"\xef\xbf\xbfHello", L"Hello", false}, | 313 {"\xef\xbf\xbfHello", L"\xffffHello", true}, |
314 // Truncated UTF-8 sequence. | 314 // Truncated UTF-8 sequence. |
315 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, | 315 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, |
316 // Truncated off the end. | 316 // Truncated off the end. |
317 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, | 317 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, |
318 // Non-shortest-form UTF-8. | 318 // Non-shortest-form UTF-8. |
319 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, | 319 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, |
320 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. | 320 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. |
321 {"\xed\xb0\x80", L"", false}, | 321 {"\xed\xb0\x80", L"", false}, |
322 // Non-BMP character. The result will either be in UTF-16 or UTF-32. | 322 // Non-BMP characters. The second is a non-character regarded as valid. |
| 323 // The result will either be in UTF-16 or UTF-32. |
323 #if defined(WCHAR_T_IS_UTF16) | 324 #if defined(WCHAR_T_IS_UTF16) |
324 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, | 325 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, |
| 326 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, |
325 #elif defined(WCHAR_T_IS_UTF32) | 327 #elif defined(WCHAR_T_IS_UTF32) |
326 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, | 328 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, |
| 329 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, |
327 #endif | 330 #endif |
328 }; | 331 }; |
329 | 332 |
330 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { | 333 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
331 std::wstring converted; | 334 std::wstring converted; |
332 EXPECT_EQ(convert_cases[i].success, | 335 EXPECT_EQ(convert_cases[i].success, |
333 UTF8ToWide(convert_cases[i].utf8, | 336 UTF8ToWide(convert_cases[i].utf8, |
334 strlen(convert_cases[i].utf8), | 337 strlen(convert_cases[i].utf8), |
335 &converted)); | 338 &converted)); |
336 std::wstring expected(convert_cases[i].wide); | 339 std::wstring expected(convert_cases[i].wide); |
(...skipping 23 matching lines...) Expand all Loading... |
360 TEST(StringUtilTest, ConvertUTF16ToUTF8) { | 363 TEST(StringUtilTest, ConvertUTF16ToUTF8) { |
361 struct UTF16ToUTF8Case { | 364 struct UTF16ToUTF8Case { |
362 const wchar_t* utf16; | 365 const wchar_t* utf16; |
363 const char* utf8; | 366 const char* utf8; |
364 bool success; | 367 bool success; |
365 } convert_cases[] = { | 368 } convert_cases[] = { |
366 // Regular UTF-16 input. | 369 // Regular UTF-16 input. |
367 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 370 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
368 // Test a non-BMP character. | 371 // Test a non-BMP character. |
369 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, | 372 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, |
370 // Invalid Unicode code point. | 373 // Non-characters are passed through. |
371 {L"\xffffHello", "Hello", false}, | 374 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
| 375 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, |
372 // The first character is a truncated UTF-16 character. | 376 // The first character is a truncated UTF-16 character. |
373 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, | 377 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, |
374 // Truncated at the end. | 378 // Truncated at the end. |
375 {L"\x597d\xd800", "\xe5\xa5\xbd", false}, | 379 {L"\x597d\xd800", "\xe5\xa5\xbd", false}, |
376 }; | 380 }; |
377 | 381 |
378 for (int i = 0; i < arraysize(convert_cases); i++) { | 382 for (int i = 0; i < arraysize(convert_cases); i++) { |
379 std::string converted; | 383 std::string converted; |
380 EXPECT_EQ(convert_cases[i].success, | 384 EXPECT_EQ(convert_cases[i].success, |
381 WideToUTF8(convert_cases[i].utf16, | 385 WideToUTF8(convert_cases[i].utf16, |
382 wcslen(convert_cases[i].utf16), | 386 wcslen(convert_cases[i].utf16), |
383 &converted)); | 387 &converted)); |
384 std::string expected(convert_cases[i].utf8); | 388 std::string expected(convert_cases[i].utf8); |
385 EXPECT_EQ(expected, converted); | 389 EXPECT_EQ(expected, converted); |
386 } | 390 } |
387 } | 391 } |
388 | 392 |
389 #elif defined(WCHAR_T_IS_UTF32) | 393 #elif defined(WCHAR_T_IS_UTF32) |
390 // This test is only valid when wchar_t == UTF-32. | 394 // This test is only valid when wchar_t == UTF-32. |
391 TEST(StringUtilTest, ConvertUTF32ToUTF8) { | 395 TEST(StringUtilTest, ConvertUTF32ToUTF8) { |
392 struct UTF8ToWideCase { | 396 struct WideToUTF8Case { |
393 const wchar_t* utf32; | 397 const wchar_t* utf32; |
394 const char* utf8; | 398 const char* utf8; |
395 bool success; | 399 bool success; |
396 } convert_cases[] = { | 400 } convert_cases[] = { |
397 // Regular 16-bit input. | 401 // Regular 16-bit input. |
398 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 402 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
399 // Test a non-BMP character. | 403 // Test a non-BMP character. |
400 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, | 404 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, |
| 405 // Non-characters are passed through. |
| 406 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
| 407 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, |
401 // Invalid Unicode code points. | 408 // Invalid Unicode code points. |
402 {L"\xffffHello", "Hello", false}, | |
403 {L"\xfffffffHello", "Hello", false}, | 409 {L"\xfffffffHello", "Hello", false}, |
404 // The first character is a truncated UTF-16 character. | 410 // The first character is a truncated UTF-16 character. |
405 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, | 411 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, |
| 412 {L"\xdc01Hello", "Hello", false}, |
406 }; | 413 }; |
407 | 414 |
408 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { | 415 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
409 std::string converted; | 416 std::string converted; |
410 EXPECT_EQ(convert_cases[i].success, | 417 EXPECT_EQ(convert_cases[i].success, |
411 WideToUTF8(convert_cases[i].utf32, | 418 WideToUTF8(convert_cases[i].utf32, |
412 wcslen(convert_cases[i].utf32), | 419 wcslen(convert_cases[i].utf32), |
413 &converted)); | 420 &converted)); |
414 std::string expected(convert_cases[i].utf8); | 421 std::string expected(convert_cases[i].utf8); |
415 EXPECT_EQ(expected, converted); | 422 EXPECT_EQ(expected, converted); |
(...skipping 1261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1677 } | 1684 } |
1678 } | 1685 } |
1679 | 1686 |
1680 TEST(StringUtilTest, HexEncode) { | 1687 TEST(StringUtilTest, HexEncode) { |
1681 std::string hex(HexEncode(NULL, 0)); | 1688 std::string hex(HexEncode(NULL, 0)); |
1682 EXPECT_EQ(hex.length(), 0U); | 1689 EXPECT_EQ(hex.length(), 0U); |
1683 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81}; | 1690 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81}; |
1684 hex = HexEncode(bytes, sizeof(bytes)); | 1691 hex = HexEncode(bytes, sizeof(bytes)); |
1685 EXPECT_EQ(hex.compare("01FF02FE038081"), 0); | 1692 EXPECT_EQ(hex.compare("01FF02FE038081"), 0); |
1686 } | 1693 } |
OLD | NEW |