OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <stdarg.h> | 6 #include <stdarg.h> |
7 | 7 |
8 #include <limits> | 8 #include <limits> |
9 #include <sstream> | 9 #include <sstream> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
12 #include "base/i18n/icu_string_conversions.h" | 12 #include "base/i18n/icu_string_conversions.h" |
13 #include "base/logging.h" | 13 #include "base/logging.h" |
| 14 #include "base/string_piece.h" |
14 #include "base/utf_string_conversions.h" | 15 #include "base/utf_string_conversions.h" |
15 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
16 | 17 |
17 namespace base { | 18 namespace base { |
18 | 19 |
19 namespace { | 20 namespace { |
20 | 21 |
21 // Given a null-terminated string of wchar_t with each wchar_t representing | 22 // Given a null-terminated string of wchar_t with each wchar_t representing |
22 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. | 23 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. |
23 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) | 24 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) |
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
318 kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) { | 319 kConvertCodepageCases[i].on_error == OnStringConversionError::FAIL) { |
319 std::string encoded; | 320 std::string encoded; |
320 success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name, | 321 success = UTF16ToCodepage(utf16, kConvertCodepageCases[i].codepage_name, |
321 kConvertCodepageCases[i].on_error, &encoded); | 322 kConvertCodepageCases[i].on_error, &encoded); |
322 EXPECT_EQ(kConvertCodepageCases[i].success, success); | 323 EXPECT_EQ(kConvertCodepageCases[i].success, success); |
323 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded); | 324 EXPECT_EQ(kConvertCodepageCases[i].encoded, encoded); |
324 } | 325 } |
325 } | 326 } |
326 } | 327 } |
327 | 328 |
| 329 static const struct { |
| 330 const char* encoded; |
| 331 const char* codepage_name; |
| 332 bool expected_success; |
| 333 const char* expected_value; |
| 334 } kConvertAndNormalizeCases[] = { |
| 335 {"foo-\xe4.html", "iso-8859-1", true, "foo-\xc3\xa4.html"}, |
| 336 {"foo-\xe4.html", "iso-8859-7", true, "foo-\xce\xb4.html"}, |
| 337 {"foo-\xe4.html", "foo-bar", false, ""}, |
| 338 {"foo-\xff.html", "ascii", false, ""}, |
| 339 {"foo.html", "ascii", true, "foo.html"}, |
| 340 {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, |
| 341 {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, |
| 342 {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, |
| 343 // Windows-1258 does have a combining character at xD2 (which is U+0309). |
| 344 // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. |
| 345 {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, |
| 346 }; |
| 347 TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { |
| 348 std::string result; |
| 349 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertAndNormalizeCases); ++i) { |
| 350 bool success = ConvertToUtf8AndNormalize( |
| 351 kConvertAndNormalizeCases[i].encoded, |
| 352 kConvertAndNormalizeCases[i].codepage_name, &result); |
| 353 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); |
| 354 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); |
| 355 } |
| 356 } |
| 357 |
328 } // namespace base | 358 } // namespace base |
OLD | NEW |