OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <stdarg.h> | 6 #include <stdarg.h> |
7 | 7 |
8 #include <limits> | 8 #include <limits> |
9 #include <sstream> | 9 #include <sstream> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
| 12 #include "base/format_macros.h" |
12 #include "base/i18n/icu_string_conversions.h" | 13 #include "base/i18n/icu_string_conversions.h" |
13 #include "base/logging.h" | 14 #include "base/logging.h" |
| 15 #include "base/stringprintf.h" |
14 #include "base/string_piece.h" | 16 #include "base/string_piece.h" |
15 #include "base/utf_string_conversions.h" | 17 #include "base/utf_string_conversions.h" |
16 #include "testing/gtest/include/gtest/gtest.h" | 18 #include "testing/gtest/include/gtest/gtest.h" |
17 | 19 |
18 namespace base { | 20 namespace base { |
19 | 21 |
20 namespace { | 22 namespace { |
21 | 23 |
22 // Given a null-terminated string of wchar_t with each wchar_t representing | 24 // Given a null-terminated string of wchar_t with each wchar_t representing |
23 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. | 25 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
65 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | 67 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) |
66 L"\x11d40\x11d41\x11d42\x11d43\x11d44", | 68 L"\x11d40\x11d41\x11d42\x11d43\x11d44", |
67 #endif | 69 #endif |
68 }; | 70 }; |
69 | 71 |
70 } // namespace | 72 } // namespace |
71 | 73 |
72 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) { | 74 TEST(ICUStringConversionsTest, ConvertCodepageUTF8) { |
73 // Make sure WideToCodepage works like WideToUTF8. | 75 // Make sure WideToCodepage works like WideToUTF8. |
74 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | 76 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { |
| 77 SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %ls", |
| 78 i, kConvertRoundtripCases[i])); |
| 79 |
75 std::string expected(WideToUTF8(kConvertRoundtripCases[i])); | 80 std::string expected(WideToUTF8(kConvertRoundtripCases[i])); |
76 std::string utf8; | 81 std::string utf8; |
77 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8, | 82 EXPECT_TRUE(WideToCodepage(kConvertRoundtripCases[i], kCodepageUTF8, |
78 OnStringConversionError::SKIP, &utf8)); | 83 OnStringConversionError::SKIP, &utf8)); |
79 EXPECT_EQ(expected, utf8); | 84 EXPECT_EQ(expected, utf8); |
80 } | 85 } |
81 } | 86 } |
82 | 87 |
83 // kConverterCodepageCases is not comprehensive. There are a number of cases | 88 // kConverterCodepageCases is not comprehensive. There are a number of cases |
84 // to add if we really want to have a comprehensive coverage of various | 89 // to add if we really want to have a comprehensive coverage of various |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
230 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", | 235 "\xCA\xC7\xD1\xCA\xB4\xD5" "\xA4\xC3\xD1\xBA", |
231 OnStringConversionError::FAIL, | 236 OnStringConversionError::FAIL, |
232 true, | 237 true, |
233 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" | 238 L"\x0E2A\x0E27\x0E31\x0E2A\x0E14\x0E35" |
234 L"\x0E04\x0E23\x0e31\x0E1A", | 239 L"\x0E04\x0E23\x0e31\x0E1A", |
235 NULL}, | 240 NULL}, |
236 }; | 241 }; |
237 | 242 |
238 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) { | 243 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndWide) { |
239 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { | 244 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { |
| 245 SCOPED_TRACE(base::StringPrintf( |
| 246 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, |
| 247 kConvertCodepageCases[i].encoded, |
| 248 kConvertCodepageCases[i].codepage_name)); |
| 249 |
240 std::wstring wide; | 250 std::wstring wide; |
241 bool success = CodepageToWide(kConvertCodepageCases[i].encoded, | 251 bool success = CodepageToWide(kConvertCodepageCases[i].encoded, |
242 kConvertCodepageCases[i].codepage_name, | 252 kConvertCodepageCases[i].codepage_name, |
243 kConvertCodepageCases[i].on_error, | 253 kConvertCodepageCases[i].on_error, |
244 &wide); | 254 &wide); |
245 EXPECT_EQ(kConvertCodepageCases[i].success, success); | 255 EXPECT_EQ(kConvertCodepageCases[i].success, success); |
246 EXPECT_EQ(kConvertCodepageCases[i].wide, wide); | 256 EXPECT_EQ(kConvertCodepageCases[i].wide, wide); |
247 | 257 |
248 // When decoding was successful and nothing was skipped, we also check the | 258 // When decoding was successful and nothing was skipped, we also check the |
249 // reverse conversion. Not all conversions are round-trippable, but | 259 // reverse conversion. Not all conversions are round-trippable, but |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
292 OnStringConversionError::SKIP, &encoded)); | 302 OnStringConversionError::SKIP, &encoded)); |
293 EXPECT_STREQ("az", encoded.c_str()); | 303 EXPECT_STREQ("az", encoded.c_str()); |
294 | 304 |
295 // Invalid codepages should fail. | 305 // Invalid codepages should fail. |
296 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2", | 306 EXPECT_FALSE(WideToCodepage(L"Hello, world", "awesome-8571-2", |
297 OnStringConversionError::SKIP, &encoded)); | 307 OnStringConversionError::SKIP, &encoded)); |
298 } | 308 } |
299 | 309 |
300 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { | 310 TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) { |
301 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { | 311 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertCodepageCases); ++i) { |
| 312 SCOPED_TRACE(base::StringPrintf( |
| 313 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, |
| 314 kConvertCodepageCases[i].encoded, |
| 315 kConvertCodepageCases[i].codepage_name)); |
| 316 |
302 string16 utf16; | 317 string16 utf16; |
303 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded, | 318 bool success = CodepageToUTF16(kConvertCodepageCases[i].encoded, |
304 kConvertCodepageCases[i].codepage_name, | 319 kConvertCodepageCases[i].codepage_name, |
305 kConvertCodepageCases[i].on_error, | 320 kConvertCodepageCases[i].on_error, |
306 &utf16); | 321 &utf16); |
307 string16 utf16_expected; | 322 string16 utf16_expected; |
308 if (kConvertCodepageCases[i].u16_wide == NULL) | 323 if (kConvertCodepageCases[i].u16_wide == NULL) |
309 utf16_expected = BuildString16(kConvertCodepageCases[i].wide); | 324 utf16_expected = BuildString16(kConvertCodepageCases[i].wide); |
310 else | 325 else |
311 utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide); | 326 utf16_expected = BuildString16(kConvertCodepageCases[i].u16_wide); |
(...skipping 28 matching lines...) Expand all Loading... |
340 {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, | 355 {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, |
341 {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, | 356 {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, |
342 {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, | 357 {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, |
343 // Windows-1258 does have a combining character at xD2 (which is U+0309). | 358 // Windows-1258 does have a combining character at xD2 (which is U+0309). |
344 // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. | 359 // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. |
345 {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, | 360 {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, |
346 }; | 361 }; |
347 TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { | 362 TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { |
348 std::string result; | 363 std::string result; |
349 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertAndNormalizeCases); ++i) { | 364 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertAndNormalizeCases); ++i) { |
| 365 SCOPED_TRACE(base::StringPrintf( |
| 366 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, |
| 367 kConvertAndNormalizeCases[i].encoded, |
| 368 kConvertAndNormalizeCases[i].codepage_name)); |
| 369 |
350 bool success = ConvertToUtf8AndNormalize( | 370 bool success = ConvertToUtf8AndNormalize( |
351 kConvertAndNormalizeCases[i].encoded, | 371 kConvertAndNormalizeCases[i].encoded, |
352 kConvertAndNormalizeCases[i].codepage_name, &result); | 372 kConvertAndNormalizeCases[i].codepage_name, &result); |
353 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); | 373 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); |
354 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); | 374 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); |
355 } | 375 } |
356 } | 376 } |
357 | 377 |
358 } // namespace base | 378 } // namespace base |
OLD | NEW |