Index: base/i18n/icu_string_conversions_unittest.cc |
=================================================================== |
--- base/i18n/icu_string_conversions_unittest.cc (revision 64006) |
+++ base/i18n/icu_string_conversions_unittest.cc (working copy) |
@@ -11,6 +11,7 @@ |
#include "base/basictypes.h" |
#include "base/i18n/icu_string_conversions.h" |
#include "base/logging.h" |
+#include "base/string_piece.h" |
#include "base/utf_string_conversions.h" |
#include "testing/gtest/include/gtest/gtest.h" |
@@ -325,4 +326,33 @@ |
} |
} |
+static const struct { |
+ const char* encoded; |
+ const char* codepage_name; |
+ bool expected_success; |
+ const char* expected_value; |
+} kConvertAndNormalizeCases[] = { |
+ {"foo-\xe4.html", "iso-8859-1", true, "foo-\xc3\xa4.html"}, |
+ {"foo-\xe4.html", "iso-8859-7", true, "foo-\xce\xb4.html"}, |
+ {"foo-\xe4.html", "foo-bar", false, ""}, |
+ {"foo-\xff.html", "ascii", false, ""}, |
+ {"foo.html", "ascii", true, "foo.html"}, |
+ {"foo-a\xcc\x88.html", "utf-8", true, "foo-\xc3\xa4.html"}, |
+ {"\x95\x32\x82\x36\xD2\xBB", "gb18030", true, "\xF0\xA0\x80\x80\xE4\xB8\x80"}, |
+ {"\xA7\x41\xA6\x6E", "big5", true, "\xE4\xBD\xA0\xE5\xA5\xBD"}, |
+ // Windows-1258 does have a combining character at xD2 (which is U+0309). |
+ // The sequence of (U+00E2, U+0309) is also encoded as U+1EA9. |
+ {"foo\xE2\xD2", "windows-1258", true, "foo\xE1\xBA\xA9"}, |
+}; |
+TEST(ICUStringConversionsTest, ConvertToUtf8AndNormalize) { |
+ std::string result; |
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kConvertAndNormalizeCases); ++i) { |
+ bool success = ConvertToUtf8AndNormalize( |
+ kConvertAndNormalizeCases[i].encoded, |
+ kConvertAndNormalizeCases[i].codepage_name, &result); |
+ EXPECT_EQ(kConvertAndNormalizeCases[i].expected_success, success); |
+ EXPECT_EQ(kConvertAndNormalizeCases[i].expected_value, result); |
+ } |
+} |
+ |
} // namespace base |