OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/basictypes.h" | 5 #include "base/basictypes.h" |
6 #include "base/string_util.h" | 6 #include "base/string_util.h" |
7 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
8 | 8 |
9 namespace base { | 9 namespace base { |
10 | 10 |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
87 struct UTF8ToWideCase { | 87 struct UTF8ToWideCase { |
88 const char* utf8; | 88 const char* utf8; |
89 const wchar_t* wide; | 89 const wchar_t* wide; |
90 bool success; | 90 bool success; |
91 } convert_cases[] = { | 91 } convert_cases[] = { |
92 // Regular UTF-8 input. | 92 // Regular UTF-8 input. |
93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, | 93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, |
94 // Non-character is passed through. | 94 // Non-character is passed through. |
95 {"\xef\xbf\xbfHello", L"\xffffHello", true}, | 95 {"\xef\xbf\xbfHello", L"\xffffHello", true}, |
96 // Truncated UTF-8 sequence. | 96 // Truncated UTF-8 sequence. |
97 {"\xe4\xa0\xe5\xa5\xbd", L"\x597d", false}, | 97 {"\xe4\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, |
98 // Truncated off the end. | 98 // Truncated off the end. |
99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d", false}, | 99 {"\xe5\xa5\xbd\xe4\xa0", L"\x597d\xfffd", false}, |
100 // Non-shortest-form UTF-8. | 100 // Non-shortest-form UTF-8. |
101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\x597d", false}, | 101 {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", L"\xfffd\x597d", false}, |
102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. | 102 // This UTF-8 character decodes to a UTF-16 surrogate, which is illegal. |
103 {"\xed\xb0\x80", L"", false}, | 103 {"\xed\xb0\x80", L"\xfffd", false}, |
104 // Non-BMP characters. The second is a non-character regarded as valid. | 104 // Non-BMP characters. The second is a non-character regarded as valid. |
105 // The result will either be in UTF-16 or UTF-32. | 105 // The result will either be in UTF-16 or UTF-32. |
106 #if defined(WCHAR_T_IS_UTF16) | 106 #if defined(WCHAR_T_IS_UTF16) |
107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, | 107 {"A\xF0\x90\x8C\x80z", L"A\xd800\xdf00z", true}, |
108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, | 108 {"A\xF4\x8F\xBF\xBEz", L"A\xdbff\xdffez", true}, |
109 #elif defined(WCHAR_T_IS_UTF32) | 109 #elif defined(WCHAR_T_IS_UTF32) |
110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, | 110 {"A\xF0\x90\x8C\x80z", L"A\x10300z", true}, |
111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, | 111 {"A\xF4\x8F\xBF\xBEz", L"A\x10fffez", true}, |
112 #endif | 112 #endif |
113 }; | 113 }; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
145 bool success; | 145 bool success; |
146 } convert_cases[] = { | 146 } convert_cases[] = { |
147 // Regular UTF-16 input. | 147 // Regular UTF-16 input. |
148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
149 // Test a non-BMP character. | 149 // Test a non-BMP character. |
150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, | 150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, |
151 // Non-characters are passed through. | 151 // Non-characters are passed through. |
152 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, | 152 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, | 153 {L"\xdbff\xdffeHello", "\xF4\x8F\xBF\xBEHello", true}, |
154 // The first character is a truncated UTF-16 character. | 154 // The first character is a truncated UTF-16 character. |
155 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, | 155 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, |
156 // Truncated at the end. | 156 // Truncated at the end. |
157 {L"\x597d\xd800", "\xe5\xa5\xbd", false}, | 157 {L"\x597d\xd800", "\xe5\xa5\xbd\xef\xbf\xbd", false}, |
158 }; | 158 }; |
159 | 159 |
160 for (int i = 0; i < arraysize(convert_cases); i++) { | 160 for (int i = 0; i < arraysize(convert_cases); i++) { |
161 std::string converted; | 161 std::string converted; |
162 EXPECT_EQ(convert_cases[i].success, | 162 EXPECT_EQ(convert_cases[i].success, |
163 WideToUTF8(convert_cases[i].utf16, | 163 WideToUTF8(convert_cases[i].utf16, |
164 wcslen(convert_cases[i].utf16), | 164 wcslen(convert_cases[i].utf16), |
165 &converted)); | 165 &converted)); |
166 std::string expected(convert_cases[i].utf8); | 166 std::string expected(convert_cases[i].utf8); |
167 EXPECT_EQ(expected, converted); | 167 EXPECT_EQ(expected, converted); |
168 } | 168 } |
169 } | 169 } |
170 | 170 |
171 #elif defined(WCHAR_T_IS_UTF32) | 171 #elif defined(WCHAR_T_IS_UTF32) |
172 // This test is only valid when wchar_t == UTF-32. | 172 // This test is only valid when wchar_t == UTF-32. |
173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { | 173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { |
174 struct WideToUTF8Case { | 174 struct WideToUTF8Case { |
175 const wchar_t* utf32; | 175 const wchar_t* utf32; |
176 const char* utf8; | 176 const char* utf8; |
177 bool success; | 177 bool success; |
178 } convert_cases[] = { | 178 } convert_cases[] = { |
179 // Regular 16-bit input. | 179 // Regular 16-bit input. |
180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
181 // Test a non-BMP character. | 181 // Test a non-BMP character. |
182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, | 182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, |
183 // Non-characters are passed through. | 183 // Non-characters are passed through. |
184 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, | 184 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, | 185 {L"\x10fffeHello", "\xF4\x8F\xBF\xBEHello", true}, |
186 // Invalid Unicode code points. | 186 // Invalid Unicode code points. |
187 {L"\xfffffffHello", "Hello", false}, | 187 {L"\xfffffffHello", "\xEF\xBF\xBDHello", false}, |
188 // The first character is a truncated UTF-16 character. | 188 // The first character is a truncated UTF-16 character. |
189 {L"\xd800\x597d", "\xe5\xa5\xbd", false}, | 189 {L"\xd800\x597d", "\xef\xbf\xbd\xe5\xa5\xbd", false}, |
190 {L"\xdc01Hello", "Hello", false}, | 190 {L"\xdc01Hello", "\xef\xbf\xbdHello", false}, |
191 }; | 191 }; |
192 | 192 |
193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { | 193 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(convert_cases); i++) { |
194 std::string converted; | 194 std::string converted; |
195 EXPECT_EQ(convert_cases[i].success, | 195 EXPECT_EQ(convert_cases[i].success, |
196 WideToUTF8(convert_cases[i].utf32, | 196 WideToUTF8(convert_cases[i].utf32, |
197 wcslen(convert_cases[i].utf32), | 197 wcslen(convert_cases[i].utf32), |
198 &converted)); | 198 &converted)); |
199 std::string expected(convert_cases[i].utf8); | 199 std::string expected(convert_cases[i].utf8); |
200 EXPECT_EQ(expected, converted); | 200 EXPECT_EQ(expected, converted); |
(...skipping 19 matching lines...) Expand all Loading... |
220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); | 220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); |
221 std::string expected; | 221 std::string expected; |
222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); | 222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); |
223 EXPECT_EQ(arraysize(multi) - 1, expected.length()); | 223 EXPECT_EQ(arraysize(multi) - 1, expected.length()); |
224 const std::string& converted = WideToUTF8(wmultistring); | 224 const std::string& converted = WideToUTF8(wmultistring); |
225 EXPECT_EQ(arraysize(multi) - 1, converted.length()); | 225 EXPECT_EQ(arraysize(multi) - 1, converted.length()); |
226 EXPECT_EQ(expected, converted); | 226 EXPECT_EQ(expected, converted); |
227 } | 227 } |
228 | 228 |
229 } // namaspace base | 229 } // namaspace base |
OLD | NEW |