OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 | |
6 #include <string> | |
7 | |
8 #include "base/basictypes.h" | |
9 #include "base/string_piece.h" | |
10 #include "base/test/scoped_locale.h" | |
11 #include "base/utf_string_conversions.h" | |
12 #include "base/sys_string_conversions.h" | |
13 #include "testing/gtest/include/gtest/gtest.h" | |
14 | |
15 #ifdef WCHAR_T_IS_UTF32 | |
16 static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; | |
17 #else | |
18 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; | |
19 #endif | |
20 | |
21 TEST(SysStrings, SysWideToUTF8) { | |
22 using base::SysWideToUTF8; | |
23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); | |
24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); | |
25 | |
26 // >16 bits | |
27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); | |
28 | |
29 // Error case. When Windows finds a UTF-16 character going off the end of | |
30 // a string, it just converts that literal value to UTF-8, even though this | |
31 // is invalid. | |
32 // | |
33 // This is what XP does, but Vista has different behavior, so we don't bother | |
34 // verifying it: | |
35 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", | |
36 // SysWideToUTF8(L"\x4f60\xd800zyxw")); | |
37 | |
38 // Test embedded NULLs. | |
39 std::wstring wide_null(L"a"); | |
40 wide_null.push_back(0); | |
41 wide_null.push_back('b'); | |
42 | |
43 std::string expected_null("a"); | |
44 expected_null.push_back(0); | |
45 expected_null.push_back('b'); | |
46 | |
47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); | |
48 } | |
49 | |
50 TEST(SysStrings, SysUTF8ToWide) { | |
51 using base::SysUTF8ToWide; | |
52 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); | |
53 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); | |
54 // >16 bits | |
55 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); | |
56 | |
57 // Error case. When Windows finds an invalid UTF-8 character, it just skips | |
58 // it. This seems weird because it's inconsistent with the reverse conversion. | |
59 // | |
60 // This is what XP does, but Vista has different behavior, so we don't bother | |
61 // verifying it: | |
62 // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); | |
63 | |
64 // Test embedded NULLs. | |
65 std::string utf8_null("a"); | |
66 utf8_null.push_back(0); | |
67 utf8_null.push_back('b'); | |
68 | |
69 std::wstring expected_null(L"a"); | |
70 expected_null.push_back(0); | |
71 expected_null.push_back('b'); | |
72 | |
73 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); | |
74 } | |
75 | |
76 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale. | |
77 | |
78 TEST(SysStrings, SysWideToNativeMB) { | |
79 using base::SysWideToNativeMB; | |
80 base::ScopedLocale locale("en_US.utf-8"); | |
81 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world")); | |
82 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d")); | |
83 | |
84 // >16 bits | |
85 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA)); | |
86 | |
87 // Error case. When Windows finds a UTF-16 character going off the end of | |
88 // a string, it just converts that literal value to UTF-8, even though this | |
89 // is invalid. | |
90 // | |
91 // This is what XP does, but Vista has different behavior, so we don't bother | |
92 // verifying it: | |
93 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", | |
94 // SysWideToNativeMB(L"\x4f60\xd800zyxw")); | |
95 | |
96 // Test embedded NULLs. | |
97 std::wstring wide_null(L"a"); | |
98 wide_null.push_back(0); | |
99 wide_null.push_back('b'); | |
100 | |
101 std::string expected_null("a"); | |
102 expected_null.push_back(0); | |
103 expected_null.push_back('b'); | |
104 | |
105 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null)); | |
106 } | |
107 | |
108 // We assume the test is running in a UTF8 locale. | |
109 TEST(SysStrings, SysNativeMBToWide) { | |
110 using base::SysNativeMBToWide; | |
111 base::ScopedLocale locale("en_US.utf-8"); | |
112 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world")); | |
113 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); | |
114 // >16 bits | |
115 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80")); | |
116 | |
117 // Error case. When Windows finds an invalid UTF-8 character, it just skips | |
118 // it. This seems weird because it's inconsistent with the reverse conversion. | |
119 // | |
120 // This is what XP does, but Vista has different behavior, so we don't bother | |
121 // verifying it: | |
122 // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); | |
123 | |
124 // Test embedded NULLs. | |
125 std::string utf8_null("a"); | |
126 utf8_null.push_back(0); | |
127 utf8_null.push_back('b'); | |
128 | |
129 std::wstring expected_null(L"a"); | |
130 expected_null.push_back(0); | |
131 expected_null.push_back('b'); | |
132 | |
133 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null)); | |
134 } | |
135 | |
136 static const wchar_t* const kConvertRoundtripCases[] = { | |
137 L"Google Video", | |
138 // "网页 图片 资讯更多 »" | |
139 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", | |
140 // "Παγκόσμιος Ιστός" | |
141 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
142 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", | |
143 // "Поиск страниц на русском" | |
144 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" | |
145 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" | |
146 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", | |
147 // "전체서비스" | |
148 L"\xc804\xccb4\xc11c\xbe44\xc2a4", | |
149 | |
150 // Test characters that take more than 16 bits. This will depend on whether | |
151 // wchar_t is 16 or 32 bits. | |
152 #if defined(WCHAR_T_IS_UTF16) | |
153 L"\xd800\xdf00", | |
154 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
155 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", | |
156 #elif defined(WCHAR_T_IS_UTF32) | |
157 L"\x10300", | |
158 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
159 L"\x11d40\x11d41\x11d42\x11d43\x11d44", | |
160 #endif | |
161 }; | |
162 | |
163 | |
164 TEST(SysStrings, SysNativeMBAndWide) { | |
165 base::ScopedLocale locale("en_US.utf-8"); | |
166 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
167 std::wstring wide = kConvertRoundtripCases[i]; | |
168 std::wstring trip = base::SysNativeMBToWide(base::SysWideToNativeMB(wide)); | |
169 EXPECT_EQ(wide.size(), trip.size()); | |
170 EXPECT_EQ(wide, trip); | |
171 } | |
172 | |
173 // We assume our test is running in UTF-8, so double check through ICU. | |
174 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
175 std::wstring wide = kConvertRoundtripCases[i]; | |
176 std::wstring trip = base::SysNativeMBToWide(WideToUTF8(wide)); | |
177 EXPECT_EQ(wide.size(), trip.size()); | |
178 EXPECT_EQ(wide, trip); | |
179 } | |
180 | |
181 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
182 std::wstring wide = kConvertRoundtripCases[i]; | |
183 std::wstring trip = UTF8ToWide(base::SysWideToNativeMB(wide)); | |
184 EXPECT_EQ(wide.size(), trip.size()); | |
185 EXPECT_EQ(wide, trip); | |
186 } | |
187 } | |
188 #endif // OS_LINUX | |
OLD | NEW |