OLD | NEW |
---|---|
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <locale.h> | |
6 | |
7 #include <string> | |
8 | |
9 #include "base/basictypes.h" | |
5 #include "base/string_piece.h" | 10 #include "base/string_piece.h" |
11 #include "base/string_util.h" | |
6 #include "base/sys_string_conversions.h" | 12 #include "base/sys_string_conversions.h" |
7 #include "testing/gtest/include/gtest/gtest.h" | 13 #include "testing/gtest/include/gtest/gtest.h" |
8 | 14 |
9 #ifdef WCHAR_T_IS_UTF32 | 15 #ifdef WCHAR_T_IS_UTF32 |
10 static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; | 16 static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; |
11 #else | 17 #else |
12 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; | 18 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; |
13 #endif | 19 #endif |
14 | 20 |
15 TEST(SysStrings, SysWideToUTF8) { | 21 TEST(SysStrings, SysWideToUTF8) { |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
59 std::string utf8_null("a"); | 65 std::string utf8_null("a"); |
60 utf8_null.push_back(0); | 66 utf8_null.push_back(0); |
61 utf8_null.push_back('b'); | 67 utf8_null.push_back('b'); |
62 | 68 |
63 std::wstring expected_null(L"a"); | 69 std::wstring expected_null(L"a"); |
64 expected_null.push_back(0); | 70 expected_null.push_back(0); |
65 expected_null.push_back('b'); | 71 expected_null.push_back('b'); |
66 | 72 |
67 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); | 73 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); |
68 } | 74 } |
75 | |
76 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale. | |
77 namespace { | |
78 | |
79 class ScopedSetLocale { | |
80 public: | |
81 ScopedSetLocale(const char* locale) { | |
82 old_locale_ = setlocale(LC_ALL, NULL); | |
83 setlocale(LC_ALL, locale); | |
84 } | |
85 ~ScopedSetLocale() { | |
86 setlocale(LC_ALL, old_locale_.c_str()); | |
87 } | |
88 | |
89 private: | |
90 std::string old_locale_; | |
91 }; | |
92 | |
93 } // namespace | |
94 | |
95 TEST(SysStrings, SysWideToNativeMB) { | |
96 using base::SysWideToNativeMB; | |
97 ScopedSetLocale locale("en_US.utf-8"); | |
98 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world")); | |
99 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d")); | |
100 | |
101 // >16 bits | |
102 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA)); | |
103 | |
104 // Error case. When Windows finds a UTF-16 character going off the end of | |
105 // a string, it just converts that literal value to UTF-8, even though this | |
106 // is invalid. | |
107 // | |
108 // This is what XP does, but Vista has different behavior, so we don't bother | |
109 // verifying it: | |
110 //EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", | |
111 // SysWideToNativeMB(L"\x4f60\xd800zyxw")); | |
112 | |
113 // Test embedded NULLs. | |
114 std::wstring wide_null(L"a"); | |
115 wide_null.push_back(0); | |
116 wide_null.push_back('b'); | |
117 | |
118 std::string expected_null("a"); | |
119 expected_null.push_back(0); | |
120 expected_null.push_back('b'); | |
121 | |
122 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null)); | |
123 } | |
124 | |
125 // We assume the test is running in a UTF8 locale. | |
126 TEST(SysStrings, SysNativeMBToWide) { | |
127 using base::SysNativeMBToWide; | |
128 ScopedSetLocale locale("en_US.utf-8"); | |
129 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world")); | |
130 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); | |
131 // >16 bits | |
132 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80")); | |
133 | |
134 // Error case. When Windows finds an invalid UTF-8 character, it just skips | |
135 // it. This seems weird because it's inconsistent with the reverse conversion. | |
136 // | |
137 // This is what XP does, but Vista has different behavior, so we don't bother | |
138 // verifying it: | |
139 //EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); | |
140 | |
141 // Test embedded NULLs. | |
142 std::string utf8_null("a"); | |
143 utf8_null.push_back(0); | |
144 utf8_null.push_back('b'); | |
145 | |
146 std::wstring expected_null(L"a"); | |
147 expected_null.push_back(0); | |
148 expected_null.push_back('b'); | |
149 | |
150 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null)); | |
151 } | |
152 | |
153 static const wchar_t* const kConvertRoundtripCases[] = { | |
154 L"Google Video", | |
155 // "网页 图片 资讯更多 »" | |
156 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", | |
157 // "Παγκόσμιος Ιστός" | |
158 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | |
159 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", | |
160 // "Поиск страниц на русском" | |
161 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" | |
162 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" | |
163 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", | |
164 // "전체서비스" | |
165 L"\xc804\xccb4\xc11c\xbe44\xc2a4", | |
166 | |
167 // Test characters that take more than 16 bits. This will depend on whether | |
168 // wchar_t is 16 or 32 bits. | |
169 #if defined(WCHAR_T_IS_UTF16) | |
170 L"\xd800\xdf00", | |
171 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
172 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", | |
173 #elif defined(WCHAR_T_IS_UTF32) | |
174 L"\x10300", | |
175 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | |
176 L"\x11d40\x11d41\x11d42\x11d43\x11d44", | |
177 #endif | |
178 }; | |
179 | |
180 | |
181 TEST(SysStrings, SysNativeMBAndWide) { | |
182 ScopedSetLocale locale("en_US.utf-8"); | |
183 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
184 std::wstring wide = kConvertRoundtripCases[i]; | |
185 std::wstring trip = base::SysNativeMBToWide(base::SysWideToNativeMB(wide)); | |
186 EXPECT_EQ(wide.size(), trip.size()); | |
187 EXPECT_EQ(wide, trip); | |
188 } | |
189 | |
190 // We assume our test is running in UTF-8, so double check through ICU. | |
Evan Martin
2009/06/26 15:19:26
This comment is obsolete because you've ensured it
Dean McNamee
2009/06/26 15:23:07
The comment isn't obsolete. The point was we can
| |
191 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
192 std::wstring wide = kConvertRoundtripCases[i]; | |
193 std::wstring trip = base::SysNativeMBToWide(WideToUTF8(wide)); | |
194 EXPECT_EQ(wide.size(), trip.size()); | |
195 EXPECT_EQ(wide, trip); | |
196 } | |
197 | |
198 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | |
199 std::wstring wide = kConvertRoundtripCases[i]; | |
200 std::wstring trip = UTF8ToWide(base::SysWideToNativeMB(wide)); | |
201 EXPECT_EQ(wide.size(), trip.size()); | |
202 EXPECT_EQ(wide, trip); | |
203 } | |
204 } | |
205 #endif // OS_LINUX | |
OLD | NEW |