OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <math.h> | |
6 #include <stdarg.h> | |
7 | |
8 #include <limits> | |
9 #include <sstream> | |
10 | |
11 #include "base/basictypes.h" | 5 #include "base/basictypes.h" |
12 #include "base/string_util.h" | 6 #include "base/string_util.h" |
13 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
14 | 8 |
15 namespace base { | 9 namespace base { |
16 | 10 |
17 namespace { | 11 namespace { |
18 | 12 |
19 // Given a null-terminated string of wchar_t with each wchar_t representing | 13 // Given a null-terminated string of wchar_t with each wchar_t representing |
20 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. | 14 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. |
21 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) | 15 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) |
22 // should be represented as a surrogate pair (two UTF-16 units) | 16 // should be represented as a surrogate pair (two UTF-16 units) |
23 // *even* where wchar_t is 32-bit (Linux and Mac). | 17 // *even* where wchar_t is 32-bit (Linux and Mac). |
24 // | 18 // |
25 // This is to help write tests for functions with string16 params until | 19 // This is to help write tests for functions with string16 params until |
26 // the C++ 0x UTF-16 literal is well-supported by compilers. | 20 // the C++ 0x UTF-16 literal is well-supported by compilers. |
27 string16 BuildString16(const wchar_t* s) { | 21 string16 BuildString16(const wchar_t* s) { |
28 #if defined(WCHAR_T_IS_UTF16) | 22 #if defined(WCHAR_T_IS_UTF16) |
29 return string16(s); | 23 return string16(s); |
30 #elif defined(WCHAR_T_IS_UTF32) | 24 #elif defined(WCHAR_T_IS_UTF32) |
31 string16 u16; | 25 string16 u16; |
32 while (*s != 0) { | 26 while (*s != 0) { |
33 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu); | 27 DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu); |
34 u16.push_back(*s++); | 28 u16.push_back(*s++); |
35 } | 29 } |
36 return u16; | 30 return u16; |
37 #endif | 31 #endif |
38 } | 32 } |
39 | 33 |
40 } // namespace | 34 const wchar_t* const kConvertRoundtripCases[] = { |
41 | |
42 static const struct trim_case { | |
43 const wchar_t* input; | |
44 const TrimPositions positions; | |
45 const wchar_t* output; | |
46 const TrimPositions return_value; | |
47 } trim_cases[] = { | |
48 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, | |
49 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, | |
50 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, | |
51 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, | |
52 {L"", TRIM_ALL, L"", TRIM_NONE}, | |
53 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, | |
54 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, | |
55 {L" ", TRIM_ALL, L"", TRIM_ALL}, | |
56 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, | |
57 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, | |
58 }; | |
59 | |
60 static const struct trim_case_ascii { | |
61 const char* input; | |
62 const TrimPositions positions; | |
63 const char* output; | |
64 const TrimPositions return_value; | |
65 } trim_cases_ascii[] = { | |
66 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, | |
67 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, | |
68 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, | |
69 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, | |
70 {"", TRIM_ALL, "", TRIM_NONE}, | |
71 {" ", TRIM_LEADING, "", TRIM_LEADING}, | |
72 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, | |
73 {" ", TRIM_ALL, "", TRIM_ALL}, | |
74 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, | |
75 }; | |
76 | |
77 TEST(StringUtilTest, TrimWhitespace) { | |
78 std::wstring output; // Allow contents to carry over to next testcase | |
79 for (size_t i = 0; i < arraysize(trim_cases); ++i) { | |
80 const trim_case& value = trim_cases[i]; | |
81 EXPECT_EQ(value.return_value, | |
82 TrimWhitespace(value.input, value.positions, &output)); | |
83 EXPECT_EQ(value.output, output); | |
84 } | |
85 | |
86 // Test that TrimWhitespace() can take the same string for input and output | |
87 output = L" This is a test \r\n"; | |
88 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); | |
89 EXPECT_EQ(L"This is a test", output); | |
90 | |
91 // Once more, but with a string of whitespace | |
92 output = L" \r\n"; | |
93 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); | |
94 EXPECT_EQ(L"", output); | |
95 | |
96 std::string output_ascii; | |
97 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { | |
98 const trim_case_ascii& value = trim_cases_ascii[i]; | |
99 EXPECT_EQ(value.return_value, | |
100 TrimWhitespace(value.input, value.positions, &output_ascii)); | |
101 EXPECT_EQ(value.output, output_ascii); | |
102 } | |
103 } | |
104 | |
105 static const struct collapse_case { | |
106 const wchar_t* input; | |
107 const bool trim; | |
108 const wchar_t* output; | |
109 } collapse_cases[] = { | |
110 {L" Google Video ", false, L"Google Video"}, | |
111 {L"Google Video", false, L"Google Video"}, | |
112 {L"", false, L""}, | |
113 {L" ", false, L""}, | |
114 {L"\t\rTest String\n", false, L"Test String"}, | |
115 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, | |
116 {L" Test \n \t String ", false, L"Test String"}, | |
117 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, | |
118 {L" Test String", false, L"Test String"}, | |
119 {L"Test String ", false, L"Test String"}, | |
120 {L"Test String", false, L"Test String"}, | |
121 {L"", true, L""}, | |
122 {L"\n", true, L""}, | |
123 {L" \r ", true, L""}, | |
124 {L"\nFoo", true, L"Foo"}, | |
125 {L"\r Foo ", true, L"Foo"}, | |
126 {L" Foo bar ", true, L"Foo bar"}, | |
127 {L" \tFoo bar \n", true, L"Foo bar"}, | |
128 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, | |
129 }; | |
130 | |
131 TEST(StringUtilTest, CollapseWhitespace) { | |
132 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { | |
133 const collapse_case& value = collapse_cases[i]; | |
134 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); | |
135 } | |
136 } | |
137 | |
138 static const struct collapse_case_ascii { | |
139 const char* input; | |
140 const bool trim; | |
141 const char* output; | |
142 } collapse_cases_ascii[] = { | |
143 {" Google Video ", false, "Google Video"}, | |
144 {"Google Video", false, "Google Video"}, | |
145 {"", false, ""}, | |
146 {" ", false, ""}, | |
147 {"\t\rTest String\n", false, "Test String"}, | |
148 {" Test \n \t String ", false, "Test String"}, | |
149 {" Test String", false, "Test String"}, | |
150 {"Test String ", false, "Test String"}, | |
151 {"Test String", false, "Test String"}, | |
152 {"", true, ""}, | |
153 {"\n", true, ""}, | |
154 {" \r ", true, ""}, | |
155 {"\nFoo", true, "Foo"}, | |
156 {"\r Foo ", true, "Foo"}, | |
157 {" Foo bar ", true, "Foo bar"}, | |
158 {" \tFoo bar \n", true, "Foo bar"}, | |
159 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, | |
160 }; | |
161 | |
162 TEST(StringUtilTest, CollapseWhitespaceASCII) { | |
163 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { | |
164 const collapse_case_ascii& value = collapse_cases_ascii[i]; | |
165 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); | |
166 } | |
167 } | |
168 | |
169 TEST(StringUtilTest, IsStringUTF8) { | |
170 EXPECT_TRUE(IsStringUTF8("abc")); | |
171 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); | |
172 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); | |
173 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); | |
174 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); | |
175 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM | |
176 | |
177 // surrogate code points | |
178 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); | |
179 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); | |
180 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); | |
181 | |
182 // overlong sequences | |
183 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 | |
184 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" | |
185 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 | |
186 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 | |
187 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff | |
188 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D | |
189 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 | |
190 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 | |
191 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) | |
192 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F | |
193 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 | |
194 | |
195 // Beyond U+10FFFF (the upper limit of Unicode codespace) | |
196 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 | |
197 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes | |
198 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes | |
199 | |
200 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) | |
201 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); | |
202 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); | |
203 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); | |
204 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); | |
205 | |
206 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> | |
207 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) | |
208 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE | |
209 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF | |
210 | |
211 // This should also be false, but currently we pass them through. | |
212 // Disable them for now. | |
213 #if 0 | |
214 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 | |
215 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF | |
216 #endif | |
217 | |
218 // Strings in legacy encodings. We can certainly make up strings | |
219 // in a legacy encoding that are valid in UTF-8, but in real data, | |
220 // most of them are invalid as UTF-8. | |
221 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 | |
222 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR | |
223 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 | |
224 // "abc" with U+201[CD] in windows-125[0-8] | |
225 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); | |
226 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 | |
227 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); | |
228 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 | |
229 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); | |
230 } | |
231 | |
232 static const wchar_t* const kConvertRoundtripCases[] = { | |
233 L"Google Video", | 35 L"Google Video", |
234 // "网页 图片 资讯更多 »" | 36 // "网页 图片 资讯更多 »" |
235 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", | 37 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", |
236 // "Παγκόσμιος Ιστός" | 38 // "Παγκόσμιος Ιστός" |
237 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" | 39 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" |
238 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", | 40 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", |
239 // "Поиск страниц на русском" | 41 // "Поиск страниц на русском" |
240 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" | 42 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" |
241 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" | 43 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" |
242 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", | 44 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", |
243 // "전체서비스" | 45 // "전체서비스" |
244 L"\xc804\xccb4\xc11c\xbe44\xc2a4", | 46 L"\xc804\xccb4\xc11c\xbe44\xc2a4", |
245 | 47 |
246 // Test characters that take more than 16 bits. This will depend on whether | 48 // Test characters that take more than 16 bits. This will depend on whether |
247 // wchar_t is 16 or 32 bits. | 49 // wchar_t is 16 or 32 bits. |
248 #if defined(WCHAR_T_IS_UTF16) | 50 #if defined(WCHAR_T_IS_UTF16) |
249 L"\xd800\xdf00", | 51 L"\xd800\xdf00", |
250 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | 52 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) |
251 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", | 53 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", |
252 #elif defined(WCHAR_T_IS_UTF32) | 54 #elif defined(WCHAR_T_IS_UTF32) |
253 L"\x10300", | 55 L"\x10300", |
254 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) | 56 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) |
255 L"\x11d40\x11d41\x11d42\x11d43\x11d44", | 57 L"\x11d40\x11d41\x11d42\x11d43\x11d44", |
256 #endif | 58 #endif |
257 }; | 59 }; |
258 | 60 |
259 TEST(StringUtilTest, ConvertUTF8AndWide) { | 61 } // namespace |
| 62 |
| 63 TEST(UTFStringConversionsTest, ConvertUTF8AndWide) { |
260 // we round-trip all the wide strings through UTF-8 to make sure everything | 64 // we round-trip all the wide strings through UTF-8 to make sure everything |
261 // agrees on the conversion. This uses the stream operators to test them | 65 // agrees on the conversion. This uses the stream operators to test them |
262 // simultaneously. | 66 // simultaneously. |
263 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { | 67 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { |
264 std::ostringstream utf8; | 68 std::ostringstream utf8; |
265 utf8 << WideToUTF8(kConvertRoundtripCases[i]); | 69 utf8 << WideToUTF8(kConvertRoundtripCases[i]); |
266 std::wostringstream wide; | 70 std::wostringstream wide; |
267 wide << UTF8ToWide(utf8.str()); | 71 wide << UTF8ToWide(utf8.str()); |
268 | 72 |
269 EXPECT_EQ(kConvertRoundtripCases[i], wide.str()); | 73 EXPECT_EQ(kConvertRoundtripCases[i], wide.str()); |
270 } | 74 } |
271 } | 75 } |
272 | 76 |
273 TEST(StringUtilTest, ConvertUTF8AndWideEmptyString) { | 77 TEST(UTFStringConversionsTest, ConvertUTF8AndWideEmptyString) { |
274 // An empty std::wstring should be converted to an empty std::string, | 78 // An empty std::wstring should be converted to an empty std::string, |
275 // and vice versa. | 79 // and vice versa. |
276 std::wstring wempty; | 80 std::wstring wempty; |
277 std::string empty; | 81 std::string empty; |
278 EXPECT_EQ(empty, WideToUTF8(wempty)); | 82 EXPECT_EQ(empty, WideToUTF8(wempty)); |
279 EXPECT_EQ(wempty, UTF8ToWide(empty)); | 83 EXPECT_EQ(wempty, UTF8ToWide(empty)); |
280 } | 84 } |
281 | 85 |
282 TEST(StringUtilTest, ConvertUTF8ToWide) { | 86 TEST(UTFStringConversionsTest, ConvertUTF8ToWide) { |
283 struct UTF8ToWideCase { | 87 struct UTF8ToWideCase { |
284 const char* utf8; | 88 const char* utf8; |
285 const wchar_t* wide; | 89 const wchar_t* wide; |
286 bool success; | 90 bool success; |
287 } convert_cases[] = { | 91 } convert_cases[] = { |
288 // Regular UTF-8 input. | 92 // Regular UTF-8 input. |
289 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, | 93 {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true}, |
290 // Non-character is passed through. | 94 // Non-character is passed through. |
291 {"\xef\xbf\xbfHello", L"\xffffHello", true}, | 95 {"\xef\xbf\xbfHello", L"\xffffHello", true}, |
292 // Truncated UTF-8 sequence. | 96 // Truncated UTF-8 sequence. |
(...skipping 22 matching lines...) Expand all Loading... |
315 strlen(convert_cases[i].utf8), | 119 strlen(convert_cases[i].utf8), |
316 &converted)); | 120 &converted)); |
317 std::wstring expected(convert_cases[i].wide); | 121 std::wstring expected(convert_cases[i].wide); |
318 EXPECT_EQ(expected, converted); | 122 EXPECT_EQ(expected, converted); |
319 } | 123 } |
320 | 124 |
321 // Manually test an embedded NULL. | 125 // Manually test an embedded NULL. |
322 std::wstring converted; | 126 std::wstring converted; |
323 EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted)); | 127 EXPECT_TRUE(UTF8ToWide("\00Z\t", 3, &converted)); |
324 ASSERT_EQ(3U, converted.length()); | 128 ASSERT_EQ(3U, converted.length()); |
325 #if defined(WCHAR_T_IS_UNSIGNED) | 129 EXPECT_EQ(static_cast<wchar_t>(0), converted[0]); |
326 EXPECT_EQ(0U, converted[0]); | |
327 #else | |
328 EXPECT_EQ(0, converted[0]); | |
329 #endif | |
330 EXPECT_EQ('Z', converted[1]); | 130 EXPECT_EQ('Z', converted[1]); |
331 EXPECT_EQ('\t', converted[2]); | 131 EXPECT_EQ('\t', converted[2]); |
332 | 132 |
333 // Make sure that conversion replaces, not appends. | 133 // Make sure that conversion replaces, not appends. |
334 EXPECT_TRUE(UTF8ToWide("B", 1, &converted)); | 134 EXPECT_TRUE(UTF8ToWide("B", 1, &converted)); |
335 ASSERT_EQ(1U, converted.length()); | 135 ASSERT_EQ(1U, converted.length()); |
336 EXPECT_EQ('B', converted[0]); | 136 EXPECT_EQ('B', converted[0]); |
337 } | 137 } |
338 | 138 |
339 #if defined(WCHAR_T_IS_UTF16) | 139 #if defined(WCHAR_T_IS_UTF16) |
340 // This test is only valid when wchar_t == UTF-16. | 140 // This test is only valid when wchar_t == UTF-16. |
341 TEST(StringUtilTest, ConvertUTF16ToUTF8) { | 141 TEST(UTFStringConversionsTest, ConvertUTF16ToUTF8) { |
342 struct UTF16ToUTF8Case { | 142 struct WideToUTF8Case { |
343 const wchar_t* utf16; | 143 const wchar_t* utf16; |
344 const char* utf8; | 144 const char* utf8; |
345 bool success; | 145 bool success; |
346 } convert_cases[] = { | 146 } convert_cases[] = { |
347 // Regular UTF-16 input. | 147 // Regular UTF-16 input. |
348 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 148 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
349 // Test a non-BMP character. | 149 // Test a non-BMP character. |
350 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, | 150 {L"\xd800\xdf00", "\xF0\x90\x8C\x80", true}, |
351 // Non-characters are passed through. | 151 // Non-characters are passed through. |
352 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, | 152 {L"\xffffHello", "\xEF\xBF\xBFHello", true}, |
(...skipping 10 matching lines...) Expand all Loading... |
363 WideToUTF8(convert_cases[i].utf16, | 163 WideToUTF8(convert_cases[i].utf16, |
364 wcslen(convert_cases[i].utf16), | 164 wcslen(convert_cases[i].utf16), |
365 &converted)); | 165 &converted)); |
366 std::string expected(convert_cases[i].utf8); | 166 std::string expected(convert_cases[i].utf8); |
367 EXPECT_EQ(expected, converted); | 167 EXPECT_EQ(expected, converted); |
368 } | 168 } |
369 } | 169 } |
370 | 170 |
371 #elif defined(WCHAR_T_IS_UTF32) | 171 #elif defined(WCHAR_T_IS_UTF32) |
372 // This test is only valid when wchar_t == UTF-32. | 172 // This test is only valid when wchar_t == UTF-32. |
373 TEST(StringUtilTest, ConvertUTF32ToUTF8) { | 173 TEST(UTFStringConversionsTest, ConvertUTF32ToUTF8) { |
374 struct WideToUTF8Case { | 174 struct WideToUTF8Case { |
375 const wchar_t* utf32; | 175 const wchar_t* utf32; |
376 const char* utf8; | 176 const char* utf8; |
377 bool success; | 177 bool success; |
378 } convert_cases[] = { | 178 } convert_cases[] = { |
379 // Regular 16-bit input. | 179 // Regular 16-bit input. |
380 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, | 180 {L"\x4f60\x597d", "\xe4\xbd\xa0\xe5\xa5\xbd", true}, |
381 // Test a non-BMP character. | 181 // Test a non-BMP character. |
382 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, | 182 {L"A\x10300z", "A\xF0\x90\x8C\x80z", true}, |
383 // Non-characters are passed through. | 183 // Non-characters are passed through. |
(...skipping 11 matching lines...) Expand all Loading... |
395 EXPECT_EQ(convert_cases[i].success, | 195 EXPECT_EQ(convert_cases[i].success, |
396 WideToUTF8(convert_cases[i].utf32, | 196 WideToUTF8(convert_cases[i].utf32, |
397 wcslen(convert_cases[i].utf32), | 197 wcslen(convert_cases[i].utf32), |
398 &converted)); | 198 &converted)); |
399 std::string expected(convert_cases[i].utf8); | 199 std::string expected(convert_cases[i].utf8); |
400 EXPECT_EQ(expected, converted); | 200 EXPECT_EQ(expected, converted); |
401 } | 201 } |
402 } | 202 } |
403 #endif // defined(WCHAR_T_IS_UTF32) | 203 #endif // defined(WCHAR_T_IS_UTF32) |
404 | 204 |
405 TEST(StringUtilTest, ConvertMultiString) { | 205 TEST(UTFStringConversionsTest, ConvertMultiString) { |
406 static wchar_t wmulti[] = { | 206 static wchar_t wmulti[] = { |
407 L'f', L'o', L'o', L'\0', | 207 L'f', L'o', L'o', L'\0', |
408 L'b', L'a', L'r', L'\0', | 208 L'b', L'a', L'r', L'\0', |
409 L'b', L'a', L'z', L'\0', | 209 L'b', L'a', L'z', L'\0', |
410 L'\0' | 210 L'\0' |
411 }; | 211 }; |
412 static char multi[] = { | 212 static char multi[] = { |
413 'f', 'o', 'o', '\0', | 213 'f', 'o', 'o', '\0', |
414 'b', 'a', 'r', '\0', | 214 'b', 'a', 'r', '\0', |
415 'b', 'a', 'z', '\0', | 215 'b', 'a', 'z', '\0', |
416 '\0' | 216 '\0' |
417 }; | 217 }; |
418 std::wstring wmultistring; | 218 std::wstring wmultistring; |
419 memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti)); | 219 memcpy(WriteInto(&wmultistring, arraysize(wmulti)), wmulti, sizeof(wmulti)); |
420 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); | 220 EXPECT_EQ(arraysize(wmulti) - 1, wmultistring.length()); |
421 std::string expected; | 221 std::string expected; |
422 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); | 222 memcpy(WriteInto(&expected, arraysize(multi)), multi, sizeof(multi)); |
423 EXPECT_EQ(arraysize(multi) - 1, expected.length()); | 223 EXPECT_EQ(arraysize(multi) - 1, expected.length()); |
424 const std::string& converted = WideToUTF8(wmultistring); | 224 const std::string& converted = WideToUTF8(wmultistring); |
425 EXPECT_EQ(arraysize(multi) - 1, converted.length()); | 225 EXPECT_EQ(arraysize(multi) - 1, converted.length()); |
426 EXPECT_EQ(expected, converted); | 226 EXPECT_EQ(expected, converted); |
427 } | 227 } |
428 | 228 |
429 TEST(StringUtilTest, ConvertASCII) { | 229 TEST(UTFStringConversionsTest, AdjustOffset) { |
430 static const char* char_cases[] = { | 230 // Under the hood, all the functions call the same converter function, so we |
431 "Google Video", | 231 // don't need to exhaustively check every case. |
432 "Hello, world\n", | 232 struct WideToUTF8Case { |
433 "0123ABCDwxyz \a\b\t\r\n!+,.~" | 233 const wchar_t* wide; |
| 234 size_t input_offset; |
| 235 size_t output_offset; |
| 236 } wide_to_utf8_cases[] = { |
| 237 {L"", 0, std::string::npos}, |
| 238 {L"\x4f60\x597d", 0, 0}, |
| 239 {L"\x4f60\x597d", 1, 3}, |
| 240 {L"\x4f60\x597d", 2, std::string::npos}, |
| 241 {L"\x4f60\x597d", std::wstring::npos, std::string::npos}, |
| 242 {L"\xd800\x597dz", 1, 0}, |
| 243 {L"\xd800\x597dz", 2, 3}, |
434 }; | 244 }; |
435 | 245 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf8_cases); ++i) { |
436 static const wchar_t* const wchar_cases[] = { | 246 size_t offset = wide_to_utf8_cases[i].input_offset; |
437 L"Google Video", | 247 WideToUTF8AndAdjustOffset(wide_to_utf8_cases[i].wide, &offset); |
438 L"Hello, world\n", | 248 EXPECT_EQ(wide_to_utf8_cases[i].output_offset, offset); |
439 L"0123ABCDwxyz \a\b\t\r\n!+,.~" | |
440 }; | |
441 | |
442 for (size_t i = 0; i < arraysize(char_cases); ++i) { | |
443 EXPECT_TRUE(IsStringASCII(char_cases[i])); | |
444 std::wstring wide = ASCIIToWide(char_cases[i]); | |
445 EXPECT_EQ(wchar_cases[i], wide); | |
446 | |
447 EXPECT_TRUE(IsStringASCII(wchar_cases[i])); | |
448 std::string ascii = WideToASCII(wchar_cases[i]); | |
449 EXPECT_EQ(char_cases[i], ascii); | |
450 } | 249 } |
451 | 250 |
452 EXPECT_FALSE(IsStringASCII("Google \x80Video")); | 251 struct UTF8ToWideCase { |
453 EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); | 252 const char* utf8; |
454 | 253 size_t input_offset; |
455 // Convert empty strings. | 254 size_t output_offset; |
456 std::wstring wempty; | 255 } utf8_to_wide_cases[] = { |
457 std::string empty; | 256 {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, std::wstring::npos}, |
458 EXPECT_EQ(empty, WideToASCII(wempty)); | 257 {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1}, |
459 EXPECT_EQ(wempty, ASCIIToWide(empty)); | 258 {"\xed\xb0\x80z", 3, 0}, |
460 | 259 {"A\xF0\x90\x8C\x80z", 1, 1}, |
461 // Convert strings with an embedded NUL character. | 260 {"A\xF0\x90\x8C\x80z", 2, std::wstring::npos}, |
462 const char chars_with_nul[] = "test\0string"; | 261 #if defined(WCHAR_T_IS_UTF16) |
463 const int length_with_nul = arraysize(chars_with_nul) - 1; | 262 {"A\xF0\x90\x8C\x80z", 5, 3}, |
464 std::string string_with_nul(chars_with_nul, length_with_nul); | 263 #elif defined(WCHAR_T_IS_UTF32) |
465 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); | 264 {"A\xF0\x90\x8C\x80z", 5, 2}, |
466 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), | |
467 wide_with_nul.length()); | |
468 std::string narrow_with_nul = WideToASCII(wide_with_nul); | |
469 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), | |
470 narrow_with_nul.length()); | |
471 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); | |
472 } | |
473 | |
474 TEST(StringUtilTest, ToUpperASCII) { | |
475 EXPECT_EQ('C', ToUpperASCII('C')); | |
476 EXPECT_EQ('C', ToUpperASCII('c')); | |
477 EXPECT_EQ('2', ToUpperASCII('2')); | |
478 | |
479 EXPECT_EQ(L'C', ToUpperASCII(L'C')); | |
480 EXPECT_EQ(L'C', ToUpperASCII(L'c')); | |
481 EXPECT_EQ(L'2', ToUpperASCII(L'2')); | |
482 | |
483 std::string in_place_a("Cc2"); | |
484 StringToUpperASCII(&in_place_a); | |
485 EXPECT_EQ("CC2", in_place_a); | |
486 | |
487 std::wstring in_place_w(L"Cc2"); | |
488 StringToUpperASCII(&in_place_w); | |
489 EXPECT_EQ(L"CC2", in_place_w); | |
490 | |
491 std::string original_a("Cc2"); | |
492 std::string upper_a = StringToUpperASCII(original_a); | |
493 EXPECT_EQ("CC2", upper_a); | |
494 | |
495 std::wstring original_w(L"Cc2"); | |
496 std::wstring upper_w = StringToUpperASCII(original_w); | |
497 EXPECT_EQ(L"CC2", upper_w); | |
498 } | |
499 | |
500 static const struct { | |
501 const wchar_t* src_w; | |
502 const char* src_a; | |
503 const char* dst; | |
504 } lowercase_cases[] = { | |
505 {L"FoO", "FoO", "foo"}, | |
506 {L"foo", "foo", "foo"}, | |
507 {L"FOO", "FOO", "foo"}, | |
508 }; | |
509 | |
510 TEST(StringUtilTest, LowerCaseEqualsASCII) { | |
511 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { | |
512 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, | |
513 lowercase_cases[i].dst)); | |
514 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, | |
515 lowercase_cases[i].dst)); | |
516 } | |
517 } | |
518 | |
519 TEST(StringUtilTest, GetByteDisplayUnits) { | |
520 static const struct { | |
521 int64 bytes; | |
522 DataUnits expected; | |
523 } cases[] = { | |
524 {0, DATA_UNITS_BYTE}, | |
525 {512, DATA_UNITS_BYTE}, | |
526 {10*1024, DATA_UNITS_KILOBYTE}, | |
527 {10*1024*1024, DATA_UNITS_MEGABYTE}, | |
528 {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE}, | |
529 {~(1LL<<63), DATA_UNITS_GIGABYTE}, | |
530 #ifdef NDEBUG | |
531 {-1, DATA_UNITS_BYTE}, | |
532 #endif | 265 #endif |
533 }; | 266 }; |
534 | 267 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_wide_cases); ++i) { |
535 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) | 268 size_t offset = utf8_to_wide_cases[i].input_offset; |
536 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes)); | 269 UTF8ToWideAndAdjustOffset(utf8_to_wide_cases[i].utf8, &offset); |
537 } | 270 EXPECT_EQ(utf8_to_wide_cases[i].output_offset, offset); |
538 | |
539 TEST(StringUtilTest, FormatBytes) { | |
540 static const struct { | |
541 int64 bytes; | |
542 DataUnits units; | |
543 const wchar_t* expected; | |
544 const wchar_t* expected_with_units; | |
545 } cases[] = { | |
546 {0, DATA_UNITS_BYTE, L"0", L"0 B"}, | |
547 {512, DATA_UNITS_BYTE, L"512", L"512 B"}, | |
548 {512, DATA_UNITS_KILOBYTE, L"0.5", L"0.5 kB"}, | |
549 {1024*1024, DATA_UNITS_KILOBYTE, L"1024", L"1024 kB"}, | |
550 {1024*1024, DATA_UNITS_MEGABYTE, L"1", L"1 MB"}, | |
551 {1024*1024*1024, DATA_UNITS_GIGABYTE, L"1", L"1 GB"}, | |
552 {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"}, | |
553 {~(1LL<<63), DATA_UNITS_GIGABYTE, L"8589934592", L"8589934592 GB"}, | |
554 // Make sure the first digit of the fractional part works. | |
555 {1024*1024 + 103, DATA_UNITS_KILOBYTE, L"1024.1", L"1024.1 kB"}, | |
556 {1024*1024 + 205 * 1024, DATA_UNITS_MEGABYTE, L"1.2", L"1.2 MB"}, | |
557 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIGABYTE, | |
558 L"1.9", L"1.9 GB"}, | |
559 {10LL*1024*1024*1024, DATA_UNITS_GIGABYTE, L"10", L"10 GB"}, | |
560 #ifdef NDEBUG | |
561 {-1, DATA_UNITS_BYTE, L"", L""}, | |
562 #endif | |
563 }; | |
564 | |
565 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
566 EXPECT_EQ(cases[i].expected, | |
567 FormatBytes(cases[i].bytes, cases[i].units, false)); | |
568 EXPECT_EQ(cases[i].expected_with_units, | |
569 FormatBytes(cases[i].bytes, cases[i].units, true)); | |
570 } | |
571 } | |
572 | |
573 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { | |
574 static const struct { | |
575 const char* str; | |
576 string16::size_type start_offset; | |
577 const char* find_this; | |
578 const char* replace_with; | |
579 const char* expected; | |
580 } cases[] = { | |
581 {"aaa", 0, "a", "b", "bbb"}, | |
582 {"abb", 0, "ab", "a", "ab"}, | |
583 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, | |
584 {"Not found", 0, "x", "0", "Not found"}, | |
585 {"Not found again", 5, "x", "0", "Not found again"}, | |
586 {" Making it much longer ", 0, " ", "Four score and seven years ago", | |
587 "Four score and seven years agoMakingFour score and seven years agoit" | |
588 "Four score and seven years agomuchFour score and seven years agolonger" | |
589 "Four score and seven years ago"}, | |
590 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, | |
591 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, | |
592 {"abababab", 2, "ab", "c", "abccc"}, | |
593 }; | |
594 | |
595 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { | |
596 string16 str = ASCIIToUTF16(cases[i].str); | |
597 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, | |
598 ASCIIToUTF16(cases[i].find_this), | |
599 ASCIIToUTF16(cases[i].replace_with)); | |
600 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); | |
601 } | |
602 } | |
603 | |
604 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { | |
605 static const struct { | |
606 const char* str; | |
607 string16::size_type start_offset; | |
608 const char* find_this; | |
609 const char* replace_with; | |
610 const char* expected; | |
611 } cases[] = { | |
612 {"aaa", 0, "a", "b", "baa"}, | |
613 {"abb", 0, "ab", "a", "ab"}, | |
614 {"Removing some substrings inging", 0, "ing", "", | |
615 "Remov some substrings inging"}, | |
616 {"Not found", 0, "x", "0", "Not found"}, | |
617 {"Not found again", 5, "x", "0", "Not found again"}, | |
618 {" Making it much longer ", 0, " ", "Four score and seven years ago", | |
619 "Four score and seven years agoMaking it much longer "}, | |
620 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, | |
621 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, | |
622 {"abababab", 2, "ab", "c", "abcabab"}, | |
623 }; | |
624 | |
625 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { | |
626 string16 str = ASCIIToUTF16(cases[i].str); | |
627 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, | |
628 ASCIIToUTF16(cases[i].find_this), | |
629 ASCIIToUTF16(cases[i].replace_with)); | |
630 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); | |
631 } | |
632 } | |
633 | |
634 namespace { | |
635 | |
636 template <typename INT> | |
637 struct IntToStringTest { | |
638 INT num; | |
639 const char* sexpected; | |
640 const char* uexpected; | |
641 }; | |
642 | |
643 } | |
644 | |
645 TEST(StringUtilTest, IntToString) { | |
646 | |
647 static const IntToStringTest<int> int_tests[] = { | |
648 { 0, "0", "0" }, | |
649 { -1, "-1", "4294967295" }, | |
650 { std::numeric_limits<int>::max(), "2147483647", "2147483647" }, | |
651 { std::numeric_limits<int>::min(), "-2147483648", "2147483648" }, | |
652 }; | |
653 static const IntToStringTest<int64> int64_tests[] = { | |
654 { 0, "0", "0" }, | |
655 { -1, "-1", "18446744073709551615" }, | |
656 { std::numeric_limits<int64>::max(), | |
657 "9223372036854775807", | |
658 "9223372036854775807", }, | |
659 { std::numeric_limits<int64>::min(), | |
660 "-9223372036854775808", | |
661 "9223372036854775808" }, | |
662 }; | |
663 | |
664 for (size_t i = 0; i < arraysize(int_tests); ++i) { | |
665 const IntToStringTest<int>* test = &int_tests[i]; | |
666 EXPECT_EQ(IntToString(test->num), test->sexpected); | |
667 EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected)); | |
668 EXPECT_EQ(UintToString(test->num), test->uexpected); | |
669 EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected)); | |
670 } | |
671 for (size_t i = 0; i < arraysize(int64_tests); ++i) { | |
672 const IntToStringTest<int64>* test = &int64_tests[i]; | |
673 EXPECT_EQ(Int64ToString(test->num), test->sexpected); | |
674 EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected)); | |
675 EXPECT_EQ(Uint64ToString(test->num), test->uexpected); | |
676 EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected)); | |
677 } | |
678 } | |
679 | |
680 TEST(StringUtilTest, Uint64ToString) { | |
681 static const struct { | |
682 uint64 input; | |
683 std::string output; | |
684 } cases[] = { | |
685 {0, "0"}, | |
686 {42, "42"}, | |
687 {INT_MAX, "2147483647"}, | |
688 {kuint64max, "18446744073709551615"}, | |
689 }; | |
690 | |
691 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) | |
692 EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input)); | |
693 } | |
694 | |
695 TEST(StringUtilTest, StringToInt) { | |
696 static const struct { | |
697 std::string input; | |
698 int output; | |
699 bool success; | |
700 } cases[] = { | |
701 {"0", 0, true}, | |
702 {"42", 42, true}, | |
703 {"-2147483648", INT_MIN, true}, | |
704 {"2147483647", INT_MAX, true}, | |
705 {"", 0, false}, | |
706 {" 42", 42, false}, | |
707 {"42 ", 42, false}, | |
708 {"\t\n\v\f\r 42", 42, false}, | |
709 {"blah42", 0, false}, | |
710 {"42blah", 42, false}, | |
711 {"blah42blah", 0, false}, | |
712 {"-273.15", -273, false}, | |
713 {"+98.6", 98, false}, | |
714 {"--123", 0, false}, | |
715 {"++123", 0, false}, | |
716 {"-+123", 0, false}, | |
717 {"+-123", 0, false}, | |
718 {"-", 0, false}, | |
719 {"-2147483649", INT_MIN, false}, | |
720 {"-99999999999", INT_MIN, false}, | |
721 {"2147483648", INT_MAX, false}, | |
722 {"99999999999", INT_MAX, false}, | |
723 }; | |
724 | |
725 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
726 EXPECT_EQ(cases[i].output, StringToInt(cases[i].input)); | |
727 int output; | |
728 EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output)); | |
729 EXPECT_EQ(cases[i].output, output); | |
730 | |
731 std::wstring wide_input = ASCIIToWide(cases[i].input); | |
732 EXPECT_EQ(cases[i].output, StringToInt(WideToUTF16Hack(wide_input))); | |
733 EXPECT_EQ(cases[i].success, StringToInt(WideToUTF16Hack(wide_input), | |
734 &output)); | |
735 EXPECT_EQ(cases[i].output, output); | |
736 } | 271 } |
737 | 272 |
738 // One additional test to verify that conversion of numbers in strings with | 273 #if defined(WCHAR_T_IS_UTF32) |
739 // embedded NUL characters. The NUL and extra data after it should be | 274 struct WideToUTF16Case { |
740 // interpreted as junk after the number. | 275 const wchar_t* wide; |
741 const char input[] = "6\06"; | 276 size_t input_offset; |
742 std::string input_string(input, arraysize(input) - 1); | 277 size_t output_offset; |
743 int output; | 278 } wide_to_utf16_cases[] = { |
744 EXPECT_FALSE(StringToInt(input_string, &output)); | 279 {L"\x4F60\x597D", 1, 1}, |
745 EXPECT_EQ(6, output); | 280 {L"\x20000\x4E00", 1, 2}, |
746 | |
747 std::wstring wide_input = ASCIIToWide(input_string); | |
748 EXPECT_FALSE(StringToInt(WideToUTF16Hack(wide_input), &output)); | |
749 EXPECT_EQ(6, output); | |
750 } | |
751 | |
752 TEST(StringUtilTest, StringToInt64) { | |
753 static const struct { | |
754 std::string input; | |
755 int64 output; | |
756 bool success; | |
757 } cases[] = { | |
758 {"0", 0, true}, | |
759 {"42", 42, true}, | |
760 {"-2147483648", INT_MIN, true}, | |
761 {"2147483647", INT_MAX, true}, | |
762 {"-2147483649", GG_INT64_C(-2147483649), true}, | |
763 {"-99999999999", GG_INT64_C(-99999999999), true}, | |
764 {"2147483648", GG_INT64_C(2147483648), true}, | |
765 {"99999999999", GG_INT64_C(99999999999), true}, | |
766 {"9223372036854775807", kint64max, true}, | |
767 {"-9223372036854775808", kint64min, true}, | |
768 {"09", 9, true}, | |
769 {"-09", -9, true}, | |
770 {"", 0, false}, | |
771 {" 42", 42, false}, | |
772 {"42 ", 42, false}, | |
773 {"\t\n\v\f\r 42", 42, false}, | |
774 {"blah42", 0, false}, | |
775 {"42blah", 42, false}, | |
776 {"blah42blah", 0, false}, | |
777 {"-273.15", -273, false}, | |
778 {"+98.6", 98, false}, | |
779 {"--123", 0, false}, | |
780 {"++123", 0, false}, | |
781 {"-+123", 0, false}, | |
782 {"+-123", 0, false}, | |
783 {"-", 0, false}, | |
784 {"-9223372036854775809", kint64min, false}, | |
785 {"-99999999999999999999", kint64min, false}, | |
786 {"9223372036854775808", kint64max, false}, | |
787 {"99999999999999999999", kint64max, false}, | |
788 }; | 281 }; |
789 | 282 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(wide_to_utf16_cases); ++i) { |
790 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | 283 size_t offset = wide_to_utf16_cases[i].input_offset; |
791 EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input)); | 284 WideToUTF16AndAdjustOffset(wide_to_utf16_cases[i].wide, &offset); |
792 int64 output; | 285 EXPECT_EQ(wide_to_utf16_cases[i].output_offset, offset); |
793 EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output)); | |
794 EXPECT_EQ(cases[i].output, output); | |
795 | |
796 std::wstring wide_input = ASCIIToWide(cases[i].input); | |
797 EXPECT_EQ(cases[i].output, StringToInt64(WideToUTF16Hack(wide_input))); | |
798 EXPECT_EQ(cases[i].success, StringToInt64(WideToUTF16Hack(wide_input), | |
799 &output)); | |
800 EXPECT_EQ(cases[i].output, output); | |
801 } | 286 } |
802 | 287 |
803 // One additional test to verify that conversion of numbers in strings with | 288 struct UTF16ToWideCase { |
804 // embedded NUL characters. The NUL and extra data after it should be | 289 const wchar_t* wide; |
805 // interpreted as junk after the number. | 290 size_t input_offset; |
806 const char input[] = "6\06"; | 291 size_t output_offset; |
807 std::string input_string(input, arraysize(input) - 1); | 292 } utf16_to_wide_cases[] = { |
808 int64 output; | 293 {L"\xD840\xDC00\x4E00", 0, 0}, |
809 EXPECT_FALSE(StringToInt64(input_string, &output)); | 294 {L"\xD840\xDC00\x4E00", 1, std::wstring::npos}, |
810 EXPECT_EQ(6, output); | 295 {L"\xD840\xDC00\x4E00", 2, 1}, |
811 | |
812 std::wstring wide_input = ASCIIToWide(input_string); | |
813 EXPECT_FALSE(StringToInt64(WideToUTF16Hack(wide_input), &output)); | |
814 EXPECT_EQ(6, output); | |
815 } | |
816 | |
817 TEST(StringUtilTest, HexStringToInt) { | |
818 static const struct { | |
819 std::string input; | |
820 int output; | |
821 bool success; | |
822 } cases[] = { | |
823 {"0", 0, true}, | |
824 {"42", 66, true}, | |
825 {"-42", -66, true}, | |
826 {"+42", 66, true}, | |
827 {"7fffffff", INT_MAX, true}, | |
828 {"80000000", INT_MIN, true}, | |
829 {"ffffffff", -1, true}, | |
830 {"DeadBeef", 0xdeadbeef, true}, | |
831 {"0x42", 66, true}, | |
832 {"-0x42", -66, true}, | |
833 {"+0x42", 66, true}, | |
834 {"0x7fffffff", INT_MAX, true}, | |
835 {"0x80000000", INT_MIN, true}, | |
836 {"0xffffffff", -1, true}, | |
837 {"0XDeadBeef", 0xdeadbeef, true}, | |
838 {"0x0f", 15, true}, | |
839 {"0f", 15, true}, | |
840 {" 45", 0x45, false}, | |
841 {"\t\n\v\f\r 0x45", 0x45, false}, | |
842 {" 45", 0x45, false}, | |
843 {"45 ", 0x45, false}, | |
844 {"efgh", 0xef, false}, | |
845 {"0xefgh", 0xef, false}, | |
846 {"hgfe", 0, false}, | |
847 {"100000000", -1, false}, // don't care about |output|, just |success| | |
848 {"-", 0, false}, | |
849 {"", 0, false}, | |
850 }; | 296 }; |
851 | 297 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) { |
852 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | 298 size_t offset = utf16_to_wide_cases[i].input_offset; |
853 EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input)); | 299 UTF16ToWideAndAdjustOffset(BuildString16(utf16_to_wide_cases[i].wide), |
854 int output; | 300 &offset); |
855 EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output)); | 301 EXPECT_EQ(utf16_to_wide_cases[i].output_offset, offset); |
856 EXPECT_EQ(cases[i].output, output); | |
857 | |
858 std::wstring wide_input = ASCIIToWide(cases[i].input); | |
859 EXPECT_EQ(cases[i].output, HexStringToInt(WideToUTF16Hack(wide_input))); | |
860 EXPECT_EQ(cases[i].success, HexStringToInt(WideToUTF16Hack(wide_input), | |
861 &output)); | |
862 EXPECT_EQ(cases[i].output, output); | |
863 } | 302 } |
864 // One additional test to verify that conversion of numbers in strings with | |
865 // embedded NUL characters. The NUL and extra data after it should be | |
866 // interpreted as junk after the number. | |
867 const char input[] = "0xc0ffee\09"; | |
868 std::string input_string(input, arraysize(input) - 1); | |
869 int output; | |
870 EXPECT_FALSE(HexStringToInt(input_string, &output)); | |
871 EXPECT_EQ(0xc0ffee, output); | |
872 | |
873 std::wstring wide_input = ASCIIToWide(input_string); | |
874 EXPECT_FALSE(HexStringToInt(WideToUTF16Hack(wide_input), &output)); | |
875 EXPECT_EQ(0xc0ffee, output); | |
876 } | |
877 | |
878 TEST(StringUtilTest, HexStringToBytes) { | |
879 static const struct { | |
880 const std::string input; | |
881 const char* output; | |
882 size_t output_len; | |
883 bool success; | |
884 } cases[] = { | |
885 {"0", "", 0, false}, // odd number of characters fails | |
886 {"00", "\0", 1, true}, | |
887 {"42", "\x42", 1, true}, | |
888 {"-42", "", 0, false}, // any non-hex value fails | |
889 {"+42", "", 0, false}, | |
890 {"7fffffff", "\x7f\xff\xff\xff", 4, true}, | |
891 {"80000000", "\x80\0\0\0", 4, true}, | |
892 {"deadbeef", "\xde\xad\xbe\xef", 4, true}, | |
893 {"DeadBeef", "\xde\xad\xbe\xef", 4, true}, | |
894 {"0x42", "", 0, false}, // leading 0x fails (x is not hex) | |
895 {"0f", "\xf", 1, true}, | |
896 {"45 ", "\x45", 1, false}, | |
897 {"efgh", "\xef", 1, false}, | |
898 {"", "", 0, false}, | |
899 {"0123456789ABCDEF", "\x01\x23\x45\x67\x89\xAB\xCD\xEF", 8, true}, | |
900 {"0123456789ABCDEF012345", | |
901 "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true}, | |
902 }; | |
903 | |
904 | |
905 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
906 std::vector<uint8> output; | |
907 std::vector<uint8> compare; | |
908 EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) << | |
909 i << ": " << cases[i].input; | |
910 for (size_t j = 0; j < cases[i].output_len; ++j) | |
911 compare.push_back(static_cast<uint8>(cases[i].output[j])); | |
912 ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input; | |
913 EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) << | |
914 i << ": " << cases[i].input; | |
915 | |
916 output.clear(); | |
917 compare.clear(); | |
918 | |
919 std::wstring wide_input = ASCIIToWide(cases[i].input); | |
920 EXPECT_EQ(cases[i].success, | |
921 HexStringToBytes(WideToUTF16Hack(wide_input), &output)) << | |
922 i << ": " << cases[i].input; | |
923 for (size_t j = 0; j < cases[i].output_len; ++j) | |
924 compare.push_back(static_cast<uint8>(cases[i].output[j])); | |
925 ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input; | |
926 EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) << | |
927 i << ": " << cases[i].input; | |
928 } | |
929 } | |
930 | |
931 TEST(StringUtilTest, StringToDouble) { | |
932 static const struct { | |
933 std::string input; | |
934 double output; | |
935 bool success; | |
936 } cases[] = { | |
937 {"0", 0.0, true}, | |
938 {"42", 42.0, true}, | |
939 {"-42", -42.0, true}, | |
940 {"123.45", 123.45, true}, | |
941 {"-123.45", -123.45, true}, | |
942 {"+123.45", 123.45, true}, | |
943 {"2.99792458e8", 299792458.0, true}, | |
944 {"149597870.691E+3", 149597870691.0, true}, | |
945 {"6.", 6.0, true}, | |
946 {"9e99999999999999999999", HUGE_VAL, false}, | |
947 {"-9e99999999999999999999", -HUGE_VAL, false}, | |
948 {"1e-2", 0.01, true}, | |
949 {" 1e-2", 0.01, false}, | |
950 {"1e-2 ", 0.01, false}, | |
951 {"-1E-7", -0.0000001, true}, | |
952 {"01e02", 100, true}, | |
953 {"2.3e15", 2.3e15, true}, | |
954 {"\t\n\v\f\r -123.45e2", -12345.0, false}, | |
955 {"+123 e4", 123.0, false}, | |
956 {"123e ", 123.0, false}, | |
957 {"123e", 123.0, false}, | |
958 {" 2.99", 2.99, false}, | |
959 {"1e3.4", 1000.0, false}, | |
960 {"nothing", 0.0, false}, | |
961 {"-", 0.0, false}, | |
962 {"+", 0.0, false}, | |
963 {"", 0.0, false}, | |
964 }; | |
965 | |
966 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
967 EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input)); | |
968 double output; | |
969 EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output)); | |
970 EXPECT_DOUBLE_EQ(cases[i].output, output); | |
971 | |
972 std::wstring wide_input = ASCIIToWide(cases[i].input); | |
973 EXPECT_DOUBLE_EQ(cases[i].output, | |
974 StringToDouble(WideToUTF16Hack(wide_input))); | |
975 EXPECT_EQ(cases[i].success, StringToDouble(WideToUTF16Hack(wide_input), | |
976 &output)); | |
977 EXPECT_DOUBLE_EQ(cases[i].output, output); | |
978 } | |
979 | |
980 // One additional test to verify that conversion of numbers in strings with | |
981 // embedded NUL characters. The NUL and extra data after it should be | |
982 // interpreted as junk after the number. | |
983 const char input[] = "3.14\0159"; | |
984 std::string input_string(input, arraysize(input) - 1); | |
985 double output; | |
986 EXPECT_FALSE(StringToDouble(input_string, &output)); | |
987 EXPECT_DOUBLE_EQ(3.14, output); | |
988 | |
989 std::wstring wide_input = ASCIIToWide(input_string); | |
990 EXPECT_FALSE(StringToDouble(WideToUTF16Hack(wide_input), &output)); | |
991 EXPECT_DOUBLE_EQ(3.14, output); | |
992 } | |
993 | |
994 // This checks where we can use the assignment operator for a va_list. We need | |
995 // a way to do this since Visual C doesn't support va_copy, but assignment on | |
996 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this | |
997 // capability. | |
998 static void VariableArgsFunc(const char* format, ...) { | |
999 va_list org; | |
1000 va_start(org, format); | |
1001 | |
1002 va_list dup; | |
1003 GG_VA_COPY(dup, org); | |
1004 int i1 = va_arg(org, int); | |
1005 int j1 = va_arg(org, int); | |
1006 char* s1 = va_arg(org, char*); | |
1007 double d1 = va_arg(org, double); | |
1008 va_end(org); | |
1009 | |
1010 int i2 = va_arg(dup, int); | |
1011 int j2 = va_arg(dup, int); | |
1012 char* s2 = va_arg(dup, char*); | |
1013 double d2 = va_arg(dup, double); | |
1014 | |
1015 EXPECT_EQ(i1, i2); | |
1016 EXPECT_EQ(j1, j2); | |
1017 EXPECT_STREQ(s1, s2); | |
1018 EXPECT_EQ(d1, d2); | |
1019 | |
1020 va_end(dup); | |
1021 } | |
1022 | |
1023 TEST(StringUtilTest, VAList) { | |
1024 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); | |
1025 } | |
1026 | |
1027 TEST(StringUtilTest, StringPrintfEmptyFormat) { | |
1028 const char* empty = ""; | |
1029 EXPECT_EQ("", StringPrintf(empty)); | |
1030 EXPECT_EQ("", StringPrintf("%s", "")); | |
1031 } | |
1032 | |
1033 TEST(StringUtilTest, StringPrintfMisc) { | |
1034 EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w')); | |
1035 EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w')); | |
1036 } | |
1037 | |
1038 TEST(StringUtilTest, StringAppendfStringEmptyParam) { | |
1039 std::string value("Hello"); | |
1040 StringAppendF(&value, ""); | |
1041 EXPECT_EQ("Hello", value); | |
1042 | |
1043 std::wstring valuew(L"Hello"); | |
1044 StringAppendF(&valuew, L""); | |
1045 EXPECT_EQ(L"Hello", valuew); | |
1046 } | |
1047 | |
1048 TEST(StringUtilTest, StringAppendfEmptyString) { | |
1049 std::string value("Hello"); | |
1050 StringAppendF(&value, "%s", ""); | |
1051 EXPECT_EQ("Hello", value); | |
1052 | |
1053 std::wstring valuew(L"Hello"); | |
1054 StringAppendF(&valuew, L"%ls", L""); | |
1055 EXPECT_EQ(L"Hello", valuew); | |
1056 } | |
1057 | |
1058 TEST(StringUtilTest, StringAppendfString) { | |
1059 std::string value("Hello"); | |
1060 StringAppendF(&value, " %s", "World"); | |
1061 EXPECT_EQ("Hello World", value); | |
1062 | |
1063 std::wstring valuew(L"Hello"); | |
1064 StringAppendF(&valuew, L" %ls", L"World"); | |
1065 EXPECT_EQ(L"Hello World", valuew); | |
1066 } | |
1067 | |
1068 TEST(StringUtilTest, StringAppendfInt) { | |
1069 std::string value("Hello"); | |
1070 StringAppendF(&value, " %d", 123); | |
1071 EXPECT_EQ("Hello 123", value); | |
1072 | |
1073 std::wstring valuew(L"Hello"); | |
1074 StringAppendF(&valuew, L" %d", 123); | |
1075 EXPECT_EQ(L"Hello 123", valuew); | |
1076 } | |
1077 | |
1078 // Make sure that lengths exactly around the initial buffer size are handled | |
1079 // correctly. | |
1080 TEST(StringUtilTest, StringPrintfBounds) { | |
1081 const int src_len = 1026; | |
1082 char src[src_len]; | |
1083 for (size_t i = 0; i < arraysize(src); i++) | |
1084 src[i] = 'A'; | |
1085 | |
1086 wchar_t srcw[src_len]; | |
1087 for (size_t i = 0; i < arraysize(srcw); i++) | |
1088 srcw[i] = 'A'; | |
1089 | |
1090 for (int i = 1; i < 3; i++) { | |
1091 src[src_len - i] = 0; | |
1092 std::string out; | |
1093 SStringPrintf(&out, "%s", src); | |
1094 EXPECT_STREQ(src, out.c_str()); | |
1095 | |
1096 srcw[src_len - i] = 0; | |
1097 std::wstring outw; | |
1098 SStringPrintf(&outw, L"%ls", srcw); | |
1099 EXPECT_STREQ(srcw, outw.c_str()); | |
1100 } | |
1101 } | |
1102 | |
1103 // Test very large sprintfs that will cause the buffer to grow. | |
1104 TEST(StringUtilTest, Grow) { | |
1105 char src[1026]; | |
1106 for (size_t i = 0; i < arraysize(src); i++) | |
1107 src[i] = 'A'; | |
1108 src[1025] = 0; | |
1109 | |
1110 const char* fmt = "%sB%sB%sB%sB%sB%sB%s"; | |
1111 | |
1112 std::string out; | |
1113 SStringPrintf(&out, fmt, src, src, src, src, src, src, src); | |
1114 | |
1115 char* ref = new char[320000]; | |
1116 #if defined(OS_WIN) | |
1117 sprintf_s(ref, 320000, fmt, src, src, src, src, src, src, src); | |
1118 #elif defined(OS_POSIX) | |
1119 snprintf(ref, 320000, fmt, src, src, src, src, src, src, src); | |
1120 #endif | 303 #endif |
1121 | |
1122 EXPECT_STREQ(ref, out.c_str()); | |
1123 delete[] ref; | |
1124 } | |
1125 | |
1126 // Test the boundary condition for the size of the string_util's | |
1127 // internal buffer. | |
1128 TEST(StringUtilTest, GrowBoundary) { | |
1129 const int string_util_buf_len = 1024; | |
1130 // Our buffer should be one larger than the size of StringAppendVT's stack | |
1131 // buffer. | |
1132 const int buf_len = string_util_buf_len + 1; | |
1133 char src[buf_len + 1]; // Need extra one for NULL-terminator. | |
1134 for (int i = 0; i < buf_len; ++i) | |
1135 src[i] = 'a'; | |
1136 src[buf_len] = 0; | |
1137 | |
1138 std::string out; | |
1139 SStringPrintf(&out, "%s", src); | |
1140 | |
1141 EXPECT_STREQ(src, out.c_str()); | |
1142 } | |
1143 | |
1144 // TODO(evanm): what's the proper cross-platform test here? | |
1145 #if defined(OS_WIN) | |
1146 // sprintf in Visual Studio fails when given U+FFFF. This tests that the | |
1147 // failure case is gracefuly handled. | |
1148 TEST(StringUtilTest, Invalid) { | |
1149 wchar_t invalid[2]; | |
1150 invalid[0] = 0xffff; | |
1151 invalid[1] = 0; | |
1152 | |
1153 std::wstring out; | |
1154 SStringPrintf(&out, L"%ls", invalid); | |
1155 EXPECT_STREQ(L"", out.c_str()); | |
1156 } | |
1157 #endif | |
1158 | |
1159 // Test for SplitString | |
1160 TEST(StringUtilTest, SplitString) { | |
1161 std::vector<std::wstring> r; | |
1162 | |
1163 SplitString(L"a,b,c", L',', &r); | |
1164 EXPECT_EQ(3U, r.size()); | |
1165 EXPECT_EQ(r[0], L"a"); | |
1166 EXPECT_EQ(r[1], L"b"); | |
1167 EXPECT_EQ(r[2], L"c"); | |
1168 r.clear(); | |
1169 | |
1170 SplitString(L"a, b, c", L',', &r); | |
1171 EXPECT_EQ(3U, r.size()); | |
1172 EXPECT_EQ(r[0], L"a"); | |
1173 EXPECT_EQ(r[1], L"b"); | |
1174 EXPECT_EQ(r[2], L"c"); | |
1175 r.clear(); | |
1176 | |
1177 SplitString(L"a,,c", L',', &r); | |
1178 EXPECT_EQ(3U, r.size()); | |
1179 EXPECT_EQ(r[0], L"a"); | |
1180 EXPECT_EQ(r[1], L""); | |
1181 EXPECT_EQ(r[2], L"c"); | |
1182 r.clear(); | |
1183 | |
1184 SplitString(L"", L'*', &r); | |
1185 EXPECT_EQ(1U, r.size()); | |
1186 EXPECT_EQ(r[0], L""); | |
1187 r.clear(); | |
1188 | |
1189 SplitString(L"foo", L'*', &r); | |
1190 EXPECT_EQ(1U, r.size()); | |
1191 EXPECT_EQ(r[0], L"foo"); | |
1192 r.clear(); | |
1193 | |
1194 SplitString(L"foo ,", L',', &r); | |
1195 EXPECT_EQ(2U, r.size()); | |
1196 EXPECT_EQ(r[0], L"foo"); | |
1197 EXPECT_EQ(r[1], L""); | |
1198 r.clear(); | |
1199 | |
1200 SplitString(L",", L',', &r); | |
1201 EXPECT_EQ(2U, r.size()); | |
1202 EXPECT_EQ(r[0], L""); | |
1203 EXPECT_EQ(r[1], L""); | |
1204 r.clear(); | |
1205 | |
1206 SplitString(L"\t\ta\t", L'\t', &r); | |
1207 EXPECT_EQ(4U, r.size()); | |
1208 EXPECT_EQ(r[0], L""); | |
1209 EXPECT_EQ(r[1], L""); | |
1210 EXPECT_EQ(r[2], L"a"); | |
1211 EXPECT_EQ(r[3], L""); | |
1212 r.clear(); | |
1213 | |
1214 SplitStringDontTrim(L"\t\ta\t", L'\t', &r); | |
1215 EXPECT_EQ(4U, r.size()); | |
1216 EXPECT_EQ(r[0], L""); | |
1217 EXPECT_EQ(r[1], L""); | |
1218 EXPECT_EQ(r[2], L"a"); | |
1219 EXPECT_EQ(r[3], L""); | |
1220 r.clear(); | |
1221 | |
1222 SplitString(L"\ta\t\nb\tcc", L'\n', &r); | |
1223 EXPECT_EQ(2U, r.size()); | |
1224 EXPECT_EQ(r[0], L"a"); | |
1225 EXPECT_EQ(r[1], L"b\tcc"); | |
1226 r.clear(); | |
1227 | |
1228 SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r); | |
1229 EXPECT_EQ(2U, r.size()); | |
1230 EXPECT_EQ(r[0], L"\ta\t"); | |
1231 EXPECT_EQ(r[1], L"b\tcc"); | |
1232 r.clear(); | |
1233 } | |
1234 | |
1235 // Test for JoinString | |
1236 TEST(StringUtilTest, JoinString) { | |
1237 std::vector<std::string> in; | |
1238 EXPECT_EQ("", JoinString(in, ',')); | |
1239 | |
1240 in.push_back("a"); | |
1241 EXPECT_EQ("a", JoinString(in, ',')); | |
1242 | |
1243 in.push_back("b"); | |
1244 in.push_back("c"); | |
1245 EXPECT_EQ("a,b,c", JoinString(in, ',')); | |
1246 | |
1247 in.push_back(""); | |
1248 EXPECT_EQ("a,b,c,", JoinString(in, ',')); | |
1249 in.push_back(" "); | |
1250 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); | |
1251 } | |
1252 | |
1253 TEST(StringUtilTest, StartsWith) { | |
1254 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); | |
1255 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); | |
1256 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); | |
1257 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); | |
1258 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); | |
1259 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); | |
1260 EXPECT_FALSE(StartsWithASCII("", "javascript", false)); | |
1261 EXPECT_FALSE(StartsWithASCII("", "javascript", true)); | |
1262 EXPECT_TRUE(StartsWithASCII("java", "", false)); | |
1263 EXPECT_TRUE(StartsWithASCII("java", "", true)); | |
1264 | |
1265 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true)); | |
1266 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true)); | |
1267 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false)); | |
1268 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false)); | |
1269 EXPECT_FALSE(StartsWith(L"java", L"javascript", true)); | |
1270 EXPECT_FALSE(StartsWith(L"java", L"javascript", false)); | |
1271 EXPECT_FALSE(StartsWith(L"", L"javascript", false)); | |
1272 EXPECT_FALSE(StartsWith(L"", L"javascript", true)); | |
1273 EXPECT_TRUE(StartsWith(L"java", L"", false)); | |
1274 EXPECT_TRUE(StartsWith(L"java", L"", true)); | |
1275 } | |
1276 | |
1277 TEST(StringUtilTest, EndsWith) { | |
1278 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true)); | |
1279 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true)); | |
1280 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false)); | |
1281 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false)); | |
1282 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true)); | |
1283 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false)); | |
1284 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true)); | |
1285 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false)); | |
1286 EXPECT_FALSE(EndsWith(L"", L".plugin", false)); | |
1287 EXPECT_FALSE(EndsWith(L"", L".plugin", true)); | |
1288 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false)); | |
1289 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true)); | |
1290 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false)); | |
1291 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true)); | |
1292 EXPECT_TRUE(EndsWith(L"", L"", false)); | |
1293 EXPECT_TRUE(EndsWith(L"", L"", true)); | |
1294 } | |
1295 | |
1296 TEST(StringUtilTest, GetStringFWithOffsets) { | |
1297 std::vector<string16> subst; | |
1298 subst.push_back(ASCIIToUTF16("1")); | |
1299 subst.push_back(ASCIIToUTF16("2")); | |
1300 std::vector<size_t> offsets; | |
1301 | |
1302 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), | |
1303 subst, | |
1304 &offsets); | |
1305 EXPECT_EQ(2U, offsets.size()); | |
1306 EXPECT_EQ(7U, offsets[0]); | |
1307 EXPECT_EQ(25U, offsets[1]); | |
1308 offsets.clear(); | |
1309 | |
1310 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), | |
1311 subst, | |
1312 &offsets); | |
1313 EXPECT_EQ(2U, offsets.size()); | |
1314 EXPECT_EQ(25U, offsets[0]); | |
1315 EXPECT_EQ(7U, offsets[1]); | |
1316 offsets.clear(); | |
1317 } | |
1318 | |
1319 TEST(StringUtilTest, ReplaceStringPlaceholders) { | |
1320 std::vector<string16> subst; | |
1321 subst.push_back(ASCIIToUTF16("9a")); | |
1322 subst.push_back(ASCIIToUTF16("8b")); | |
1323 subst.push_back(ASCIIToUTF16("7c")); | |
1324 subst.push_back(ASCIIToUTF16("6d")); | |
1325 subst.push_back(ASCIIToUTF16("5e")); | |
1326 subst.push_back(ASCIIToUTF16("4f")); | |
1327 subst.push_back(ASCIIToUTF16("3g")); | |
1328 subst.push_back(ASCIIToUTF16("2h")); | |
1329 subst.push_back(ASCIIToUTF16("1i")); | |
1330 | |
1331 string16 formatted = | |
1332 ReplaceStringPlaceholders( | |
1333 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); | |
1334 | |
1335 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); | |
1336 } | |
1337 | |
1338 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { | |
1339 // Test whether replacestringplaceholders works as expected when there | |
1340 // are fewer inputs than outputs. | |
1341 std::vector<string16> subst; | |
1342 subst.push_back(ASCIIToUTF16("9a")); | |
1343 subst.push_back(ASCIIToUTF16("8b")); | |
1344 subst.push_back(ASCIIToUTF16("7c")); | |
1345 | |
1346 string16 formatted = | |
1347 ReplaceStringPlaceholders( | |
1348 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); | |
1349 | |
1350 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); | |
1351 } | |
1352 | |
1353 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { | |
1354 std::vector<std::string> subst; | |
1355 subst.push_back("9a"); | |
1356 subst.push_back("8b"); | |
1357 subst.push_back("7c"); | |
1358 subst.push_back("6d"); | |
1359 subst.push_back("5e"); | |
1360 subst.push_back("4f"); | |
1361 subst.push_back("3g"); | |
1362 subst.push_back("2h"); | |
1363 subst.push_back("1i"); | |
1364 | |
1365 std::string formatted = | |
1366 ReplaceStringPlaceholders( | |
1367 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); | |
1368 | |
1369 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); | |
1370 } | |
1371 | |
1372 TEST(StringUtilTest, SplitStringAlongWhitespace) { | |
1373 struct TestData { | |
1374 const std::wstring input; | |
1375 const size_t expected_result_count; | |
1376 const std::wstring output1; | |
1377 const std::wstring output2; | |
1378 } data[] = { | |
1379 { L"a", 1, L"a", L"" }, | |
1380 { L" ", 0, L"", L"" }, | |
1381 { L" a", 1, L"a", L"" }, | |
1382 { L" ab ", 1, L"ab", L"" }, | |
1383 { L" ab c", 2, L"ab", L"c" }, | |
1384 { L" ab c ", 2, L"ab", L"c" }, | |
1385 { L" ab cd", 2, L"ab", L"cd" }, | |
1386 { L" ab cd ", 2, L"ab", L"cd" }, | |
1387 { L" \ta\t", 1, L"a", L"" }, | |
1388 { L" b\ta\t", 2, L"b", L"a" }, | |
1389 { L" b\tat", 2, L"b", L"at" }, | |
1390 { L"b\tat", 2, L"b", L"at" }, | |
1391 { L"b\t at", 2, L"b", L"at" }, | |
1392 }; | |
1393 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { | |
1394 std::vector<std::wstring> results; | |
1395 SplitStringAlongWhitespace(data[i].input, &results); | |
1396 ASSERT_EQ(data[i].expected_result_count, results.size()); | |
1397 if (data[i].expected_result_count > 0) | |
1398 ASSERT_EQ(data[i].output1, results[0]); | |
1399 if (data[i].expected_result_count > 1) | |
1400 ASSERT_EQ(data[i].output2, results[1]); | |
1401 } | |
1402 } | |
1403 | |
1404 TEST(StringUtilTest, MatchPatternTest) { | |
1405 EXPECT_EQ(MatchPattern(L"www.google.com", L"*.com"), true); | |
1406 EXPECT_EQ(MatchPattern(L"www.google.com", L"*"), true); | |
1407 EXPECT_EQ(MatchPattern(L"www.google.com", L"www*.g*.org"), false); | |
1408 EXPECT_EQ(MatchPattern(L"Hello", L"H?l?o"), true); | |
1409 EXPECT_EQ(MatchPattern(L"www.google.com", L"http://*)"), false); | |
1410 EXPECT_EQ(MatchPattern(L"www.msn.com", L"*.COM"), false); | |
1411 EXPECT_EQ(MatchPattern(L"Hello*1234", L"He??o\\*1*"), true); | |
1412 EXPECT_EQ(MatchPattern(L"", L"*.*"), false); | |
1413 EXPECT_EQ(MatchPattern(L"", L"*"), true); | |
1414 EXPECT_EQ(MatchPattern(L"", L"?"), true); | |
1415 EXPECT_EQ(MatchPattern(L"", L""), true); | |
1416 EXPECT_EQ(MatchPattern(L"Hello", L""), false); | |
1417 EXPECT_EQ(MatchPattern(L"Hello*", L"Hello*"), true); | |
1418 EXPECT_EQ(MatchPattern("Hello*", "Hello*"), true); // narrow string | |
1419 } | |
1420 | |
1421 TEST(StringUtilTest, LcpyTest) { | |
1422 // Test the normal case where we fit in our buffer. | |
1423 { | |
1424 char dst[10]; | |
1425 wchar_t wdst[10]; | |
1426 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1427 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); | |
1428 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1429 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); | |
1430 } | |
1431 | |
1432 // Test dst_size == 0, nothing should be written to |dst| and we should | |
1433 // have the equivalent of strlen(src). | |
1434 { | |
1435 char dst[2] = {1, 2}; | |
1436 wchar_t wdst[2] = {1, 2}; | |
1437 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); | |
1438 EXPECT_EQ(1, dst[0]); | |
1439 EXPECT_EQ(2, dst[1]); | |
1440 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); | |
1441 #if defined(WCHAR_T_IS_UNSIGNED) | |
1442 EXPECT_EQ(1U, wdst[0]); | |
1443 EXPECT_EQ(2U, wdst[1]); | |
1444 #else | |
1445 EXPECT_EQ(1, wdst[0]); | |
1446 EXPECT_EQ(2, wdst[1]); | |
1447 #endif | |
1448 } | |
1449 | |
1450 // Test the case were we _just_ competely fit including the null. | |
1451 { | |
1452 char dst[8]; | |
1453 wchar_t wdst[8]; | |
1454 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1455 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); | |
1456 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1457 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); | |
1458 } | |
1459 | |
1460 // Test the case were we we are one smaller, so we can't fit the null. | |
1461 { | |
1462 char dst[7]; | |
1463 wchar_t wdst[7]; | |
1464 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1465 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); | |
1466 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1467 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); | |
1468 } | |
1469 | |
1470 // Test the case were we are just too small. | |
1471 { | |
1472 char dst[3]; | |
1473 wchar_t wdst[3]; | |
1474 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); | |
1475 EXPECT_EQ(0, memcmp(dst, "ab", 3)); | |
1476 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); | |
1477 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); | |
1478 } | |
1479 } | |
1480 | |
1481 TEST(StringUtilTest, WprintfFormatPortabilityTest) { | |
1482 struct TestData { | |
1483 const wchar_t* input; | |
1484 bool portable; | |
1485 } cases[] = { | |
1486 { L"%ls", true }, | |
1487 { L"%s", false }, | |
1488 { L"%S", false }, | |
1489 { L"%lS", false }, | |
1490 { L"Hello, %s", false }, | |
1491 { L"%lc", true }, | |
1492 { L"%c", false }, | |
1493 { L"%C", false }, | |
1494 { L"%lC", false }, | |
1495 { L"%ls %s", false }, | |
1496 { L"%s %ls", false }, | |
1497 { L"%s %ls %s", false }, | |
1498 { L"%f", true }, | |
1499 { L"%f %F", false }, | |
1500 { L"%d %D", false }, | |
1501 { L"%o %O", false }, | |
1502 { L"%u %U", false }, | |
1503 { L"%f %d %o %u", true }, | |
1504 { L"%-8d (%02.1f%)", true }, | |
1505 { L"% 10s", false }, | |
1506 { L"% 10ls", true } | |
1507 }; | |
1508 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
1509 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); | |
1510 } | |
1511 } | |
1512 | |
1513 TEST(StringUtilTest, ElideString) { | |
1514 struct TestData { | |
1515 const wchar_t* input; | |
1516 int max_len; | |
1517 bool result; | |
1518 const wchar_t* output; | |
1519 } cases[] = { | |
1520 { L"Hello", 0, true, L"" }, | |
1521 { L"", 0, false, L"" }, | |
1522 { L"Hello, my name is Tom", 1, true, L"H" }, | |
1523 { L"Hello, my name is Tom", 2, true, L"He" }, | |
1524 { L"Hello, my name is Tom", 3, true, L"H.m" }, | |
1525 { L"Hello, my name is Tom", 4, true, L"H..m" }, | |
1526 { L"Hello, my name is Tom", 5, true, L"H...m" }, | |
1527 { L"Hello, my name is Tom", 6, true, L"He...m" }, | |
1528 { L"Hello, my name is Tom", 7, true, L"He...om" }, | |
1529 { L"Hello, my name is Tom", 10, true, L"Hell...Tom" }, | |
1530 { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" } | |
1531 }; | |
1532 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { | |
1533 std::wstring output; | |
1534 EXPECT_EQ(cases[i].result, | |
1535 ElideString(cases[i].input, cases[i].max_len, &output)); | |
1536 EXPECT_TRUE(output == cases[i].output); | |
1537 } | |
1538 } | |
1539 | |
1540 TEST(StringUtilTest, HexEncode) { | |
1541 std::string hex(HexEncode(NULL, 0)); | |
1542 EXPECT_EQ(hex.length(), 0U); | |
1543 unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81}; | |
1544 hex = HexEncode(bytes, sizeof(bytes)); | |
1545 EXPECT_EQ(hex.compare("01FF02FE038081"), 0); | |
1546 } | 304 } |
1547 | 305 |
1548 } // namaspace base | 306 } // namaspace base |
OLD | NEW |