OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | 5 #include <math.h> |
6 #include <stdarg.h> | 6 #include <stdarg.h> |
7 | 7 |
8 #include <limits> | 8 #include <limits> |
9 #include <sstream> | 9 #include <sstream> |
10 | 10 |
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
336 | 336 |
337 static const struct { | 337 static const struct { |
338 const char* encoded; | 338 const char* encoded; |
339 const char* codepage_name; | 339 const char* codepage_name; |
340 bool expected_success; | 340 bool expected_success; |
341 const char* expected_value; | 341 const char* expected_value; |
342 } kConvertAndNormalizeCases[] = { | 342 } kConvertAndNormalizeCases[] = { |
343 {"foo-\xe4.html", "iso-8859-1", true, "foo-\xc3\xa4.html"}, | 343 {"foo-\xe4.html", "iso-8859-1", true, "foo-\xc3\xa4.html"}, |
344 {"foo-\xe4.html", "iso-8859-7", true, "foo-\xce\xb4.html"}, | 344 {"foo-\xe4.html", "iso-8859-7", true, "foo-\xce\xb4.html"}, |
345 {"foo-\xe4.html", "foo-bar", false, ""}, | 345 {"foo-\xe4.html", "foo-bar", false, ""}, |
346 {"foo-\xff.html", "ascii", false, ""}, | 346 // HTML Encoding spec treats US-ASCII as synonymous with windows-1252 |
| 347 {"foo-\xff.html", "ascii", true, "foo-\xc3\xbf.html"}, |
347 {"foo.html", "ascii", true, "foo.html"}, | 348 {"foo.html", "ascii", true, "foo.html"}, |
348 {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, | 349 {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, |
349 {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, | 350 {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, |
350 {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, | 351 {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, |
351 // Windows-1258 does have a combining character at xD2 (which is U+0309). | 352 // Windows-1258 does have a combining character at xD2 (which is U+0309). |
352 // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. | 353 // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. |
353 {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, | 354 {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, |
354 {"", "iso-8859-1", true, ""}, | 355 {"", "iso-8859-1", true, ""}, |
355 }; | 356 }; |
356 TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { | 357 TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { |
357 std::string result; | 358 std::string result; |
358 for (size_t i = 0; i < arraysize(kConvertAndNormalizeCases); ++i) { | 359 for (size_t i = 0; i < arraysize(kConvertAndNormalizeCases); ++i) { |
359 SCOPED_TRACE(base::StringPrintf( | 360 SCOPED_TRACE(base::StringPrintf( |
360 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, | 361 "Test[%" PRIuS "]: <encoded: %s> <codepage: %s>", i, |
361 kConvertAndNormalizeCases[i].encoded, | 362 kConvertAndNormalizeCases[i].encoded, |
362 kConvertAndNormalizeCases[i].codepage_name)); | 363 kConvertAndNormalizeCases[i].codepage_name)); |
363 | 364 |
364 bool success = ConvertToUtf8AndNormalize( | 365 bool success = ConvertToUtf8AndNormalize( |
365 kConvertAndNormalizeCases[i].encoded, | 366 kConvertAndNormalizeCases[i].encoded, |
366 kConvertAndNormalizeCases[i].codepage_name, &result); | 367 kConvertAndNormalizeCases[i].codepage_name, &result); |
367 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); | 368 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); |
368 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); | 369 EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); |
369 } | 370 } |
370 } | 371 } |
371 | 372 |
372 } // namespace base | 373 } // namespace base |
OLD | NEW |