| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <string> | |
| 6 | |
| 7 #include "base/basictypes.h" | |
| 8 #include "base/strings/string_piece.h" | |
| 9 #include "base/strings/sys_string_conversions.h" | |
| 10 #include "base/strings/utf_string_conversions.h" | |
| 11 #include "base/test/scoped_locale.h" | |
| 12 #include "testing/gtest/include/gtest/gtest.h" | |
| 13 | |
| 14 #ifdef WCHAR_T_IS_UTF32 | |
| 15 static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; | |
| 16 #else | |
| 17 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; | |
| 18 #endif | |
| 19 | |
| 20 namespace base { | |
| 21 | |
| 22 TEST(SysStrings, SysWideToUTF8) { | |
| 23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); | |
| 24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); | |
| 25 | |
| 26 // >16 bits | |
| 27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); | |
| 28 | |
| 29 // Error case. When Windows finds a UTF-16 character going off the end of | |
| 30 // a string, it just converts that literal value to UTF-8, even though this | |
| 31 // is invalid. | |
| 32 // | |
| 33 // This is what XP does, but Vista has different behavior, so we don't bother | |
| 34 // verifying it: | |
| 35 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", | |
| 36 // SysWideToUTF8(L"\x4f60\xd800zyxw")); | |
| 37 | |
| 38 // Test embedded NULLs. | |
| 39 std::wstring wide_null(L"a"); | |
| 40 wide_null.push_back(0); | |
| 41 wide_null.push_back('b'); | |
| 42 | |
| 43 std::string expected_null("a"); | |
| 44 expected_null.push_back(0); | |
| 45 expected_null.push_back('b'); | |
| 46 | |
| 47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); | |
| 48 } | |
| 49 | |
| 50 TEST(SysStrings, SysUTF8ToWide) { | |
| 51 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); | |
| 52 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); | |
| 53 // >16 bits | |
| 54 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); | |
| 55 | |
| 56 // Error case. When Windows finds an invalid UTF-8 character, it just skips | |
| 57 // it. This seems weird because it's inconsistent with the reverse conversion. | |
| 58 // | |
| 59 // This is what XP does, but Vista has different behavior, so we don't bother | |
| 60 // verifying it: | |
| 61 // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); | |
| 62 | |
| 63 // Test embedded NULLs. | |
| 64 std::string utf8_null("a"); | |
| 65 utf8_null.push_back(0); | |
| 66 utf8_null.push_back('b'); | |
| 67 | |
| 68 std::wstring expected_null(L"a"); | |
| 69 expected_null.push_back(0); | |
| 70 expected_null.push_back('b'); | |
| 71 | |
| 72 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); | |
| 73 } | |
| 74 | |
| 75 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale. | |
| 76 | |
| 77 TEST(SysStrings, SysWideToNativeMB) { | |
| 78 #if !defined(SYSTEM_NATIVE_UTF8) | |
| 79 ScopedLocale locale("en_US.utf-8"); | |
| 80 #endif | |
| 81 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world")); | |
| 82 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d")); | |
| 83 | |
| 84 // >16 bits | |
| 85 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA)); | |
| 86 | |
| 87 // Error case. When Windows finds a UTF-16 character going off the end of | |
| 88 // a string, it just converts that literal value to UTF-8, even though this | |
| 89 // is invalid. | |
| 90 // | |
| 91 // This is what XP does, but Vista has different behavior, so we don't bother | |
| 92 // verifying it: | |
| 93 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", | |
| 94 // SysWideToNativeMB(L"\x4f60\xd800zyxw")); | |
| 95 | |
| 96 // Test embedded NULLs. | |
| 97 std::wstring wide_null(L"a"); | |
| 98 wide_null.push_back(0); | |
| 99 wide_null.push_back('b'); | |
| 100 | |
| 101 std::string expected_null("a"); | |
| 102 expected_null.push_back(0); | |
| 103 expected_null.push_back('b'); | |
| 104 | |
| 105 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null)); | |
| 106 } | |
| 107 | |
| 108 // We assume the test is running in a UTF8 locale. | |
| 109 TEST(SysStrings, SysNativeMBToWide) { | |
| 110 #if !defined(SYSTEM_NATIVE_UTF8) | |
| 111 ScopedLocale locale("en_US.utf-8"); | |
| 112 #endif | |
| 113 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world")); | |
| 114 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); | |
| 115 // >16 bits | |
| 116 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80")); | |
| 117 | |
| 118 // Error case. When Windows finds an invalid UTF-8 character, it just skips | |
| 119 // it. This seems weird because it's inconsistent with the reverse conversion. | |
| 120 // | |
| 121 // This is what XP does, but Vista has different behavior, so we don't bother | |
| 122 // verifying it: | |
| 123 // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); | |
| 124 | |
| 125 // Test embedded NULLs. | |
| 126 std::string utf8_null("a"); | |
| 127 utf8_null.push_back(0); | |
| 128 utf8_null.push_back('b'); | |
| 129 | |
| 130 std::wstring expected_null(L"a"); | |
| 131 expected_null.push_back(0); | |
| 132 expected_null.push_back('b'); | |
| 133 | |
| 134 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null)); | |
| 135 } | |
| 136 | |
| 137 static const wchar_t* const kConvertRoundtripCases[] = { | |
| 138 L"Google Video", | |
| 139 // "网页 图片 资讯更多 »" | |
| 140 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", | |
| 141 // "Παγκόσμιος Ιστός" | |
| 142 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
| 143 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", | |
| 144 // "Поиск страниц на русском" | |
| 145 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" | |
| 146 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" | |
| 147 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", | |
| 148 // "전체서비스" | |
| 149 L"\xc804\xccb4\xc11c\xbe44\xc2a4", | |
| 150 | |
| 151 // Test characters that take more than 16 bits. This will depend on whether | |
| 152 // wchar_t is 16 or 32 bits. | |
| 153 #if defined(WCHAR_T_IS_UTF16) | |
| 154 L"\xd800\xdf00", | |
| 155 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
| 156 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", | |
| 157 #elif defined(WCHAR_T_IS_UTF32) | |
| 158 L"\x10300", | |
| 159 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
| 160 L"\x11d40\x11d41\x11d42\x11d43\x11d44", | |
| 161 #endif | |
| 162 }; | |
| 163 | |
| 164 | |
| 165 TEST(SysStrings, SysNativeMBAndWide) { | |
| 166 #if !defined(SYSTEM_NATIVE_UTF8) | |
| 167 ScopedLocale locale("en_US.utf-8"); | |
| 168 #endif | |
| 169 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
| 170 std::wstring wide = kConvertRoundtripCases[i]; | |
| 171 std::wstring trip = SysNativeMBToWide(SysWideToNativeMB(wide)); | |
| 172 EXPECT_EQ(wide.size(), trip.size()); | |
| 173 EXPECT_EQ(wide, trip); | |
| 174 } | |
| 175 | |
| 176 // We assume our test is running in UTF-8, so double check through ICU. | |
| 177 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
| 178 std::wstring wide = kConvertRoundtripCases[i]; | |
| 179 std::wstring trip = SysNativeMBToWide(WideToUTF8(wide)); | |
| 180 EXPECT_EQ(wide.size(), trip.size()); | |
| 181 EXPECT_EQ(wide, trip); | |
| 182 } | |
| 183 | |
| 184 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
| 185 std::wstring wide = kConvertRoundtripCases[i]; | |
| 186 std::wstring trip = UTF8ToWide(SysWideToNativeMB(wide)); | |
| 187 EXPECT_EQ(wide.size(), trip.size()); | |
| 188 EXPECT_EQ(wide, trip); | |
| 189 } | |
| 190 } | |
| 191 #endif // OS_LINUX | |
| 192 | |
| 193 } // namespace base | |
| OLD | NEW |