OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <string.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <vector> | 9 #include <string> |
10 | 10 |
11 #include "base/format_macros.h" | |
12 #include "base/strings/string_number_conversions.h" | |
13 #include "base/strings/stringprintf.h" | |
14 #include "base/strings/utf_string_conversions.h" | 11 #include "base/strings/utf_string_conversions.h" |
15 #include "base/time/time.h" | 12 #include "base/time/time.h" |
16 #include "testing/gtest/include/gtest/gtest.h" | 13 #include "testing/gtest/include/gtest/gtest.h" |
17 #include "url/gurl.h" | 14 #include "url/gurl.h" |
18 | 15 |
19 using base::ASCIIToUTF16; | |
20 using base::WideToUTF16; | |
21 | |
22 namespace net { | 16 namespace net { |
23 | 17 |
24 namespace { | 18 namespace { |
25 | 19 |
26 const size_t kNpos = base::string16::npos; | |
27 | |
28 const char* const kLanguages[] = { | |
29 "", "en", "zh-CN", "ja", "ko", | |
30 "he", "ar", "ru", "el", "fr", | |
31 "de", "pt", "sv", "th", "hi", | |
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", | |
33 "zh,ru,en" | |
34 }; | |
35 | |
36 struct IDNTestCase { | |
37 const char* const input; | |
38 const wchar_t* unicode_output; | |
39 const bool unicode_allowed[arraysize(kLanguages)]; | |
40 }; | |
41 | |
42 // TODO(jungshik) This is just a random sample of languages and is far | |
43 // from exhaustive. We may have to generate all the combinations | |
44 // of languages (powerset of a set of all the languages). | |
45 const IDNTestCase idn_cases[] = { | |
46 // No IDN | |
47 {"www.google.com", L"www.google.com", | |
48 {true, true, true, true, true, | |
49 true, true, true, true, true, | |
50 true, true, true, true, true, | |
51 true, true, true, true, true, | |
52 true}}, | |
53 {"www.google.com.", L"www.google.com.", | |
54 {true, true, true, true, true, | |
55 true, true, true, true, true, | |
56 true, true, true, true, true, | |
57 true, true, true, true, true, | |
58 true}}, | |
59 {".", L".", | |
60 {true, true, true, true, true, | |
61 true, true, true, true, true, | |
62 true, true, true, true, true, | |
63 true, true, true, true, true, | |
64 true}}, | |
65 {"", L"", | |
66 {true, true, true, true, true, | |
67 true, true, true, true, true, | |
68 true, true, true, true, true, | |
69 true, true, true, true, true, | |
70 true}}, | |
71 // IDN | |
72 // Hanzi (Traditional Chinese) | |
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", | |
74 {true, false, true, true, false, | |
75 false, false, false, false, false, | |
76 false, false, false, false, false, | |
77 false, false, true, true, false, | |
78 true}}, | |
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) | |
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com", | |
81 {true, false, true, false, false, | |
82 false, false, false, false, false, | |
83 false, false, false, false, false, | |
84 false, false, false, false, false, | |
85 true}}, | |
86 // Hanzi + '123' | |
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", | |
88 {true, false, true, true, false, | |
89 false, false, false, false, false, | |
90 false, false, false, false, false, | |
91 false, false, true, true, false, | |
92 true}}, | |
93 // Hanzi + Latin : U+56FD is simplified and is regarded | |
94 // as not supported in zh-TW. | |
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", | |
96 {false, false, true, true, false, | |
97 false, false, false, false, false, | |
98 false, false, false, false, false, | |
99 false, false, false, true, false, | |
100 true}}, | |
101 // Kanji + Kana (Japanese) | |
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", | |
103 {true, false, false, true, false, | |
104 false, false, false, false, false, | |
105 false, false, false, false, false, | |
106 false, false, false, true, false, | |
107 false}}, | |
108 // Katakana including U+30FC | |
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", | |
110 {true, false, false, true, false, | |
111 false, false, false, false, false, | |
112 false, false, false, false, false, | |
113 false, false, false, true, false, | |
114 }}, | |
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", | |
116 {true, false, false, true, false, | |
117 false, false, false, false, false, | |
118 false, false, false, false, false, | |
119 false, false, false, true, false, | |
120 }}, | |
121 // Katakana + Latin (Japanese) | |
122 // TODO(jungshik): Change 'false' in the first element to 'true' | |
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead | |
124 // of our IsIDNComponentInSingleScript(). | |
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", | |
126 {false, false, false, true, false, | |
127 false, false, false, false, false, | |
128 false, false, false, false, false, | |
129 false, false, false, true, false, | |
130 }}, | |
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", | |
132 {false, false, false, true, false, | |
133 false, false, false, false, false, | |
134 false, false, false, false, false, | |
135 false, false, false, true, false, | |
136 }}, | |
137 // Hangul (Korean) | |
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", | |
139 {true, false, false, false, true, | |
140 false, false, false, false, false, | |
141 false, false, false, false, false, | |
142 false, false, false, true, false, | |
143 false}}, | |
144 // b<u-umlaut>cher (German) | |
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", | |
146 {true, false, false, false, false, | |
147 false, false, false, false, true, | |
148 true, false, false, false, false, | |
149 true, false, false, false, false, | |
150 false}}, | |
151 // a with diaeresis | |
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", | |
153 {true, false, false, false, false, | |
154 false, false, false, false, false, | |
155 true, false, true, false, false, | |
156 true, false, false, false, false, | |
157 false}}, | |
158 // c-cedilla (French) | |
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", | |
160 {true, false, false, false, false, | |
161 false, false, false, false, true, | |
162 false, true, false, false, false, | |
163 false, false, false, false, false, | |
164 false}}, | |
165 // caf'e with acute accent' (French) | |
166 {"xn--caf-dma.fr", L"caf\x00e9.fr", | |
167 {true, false, false, false, false, | |
168 false, false, false, false, true, | |
169 false, true, true, false, false, | |
170 false, false, false, false, false, | |
171 false}}, | |
172 // c-cedillla and a with tilde (Portuguese) | |
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", | |
174 {true, false, false, false, false, | |
175 false, false, false, false, false, | |
176 false, true, false, false, false, | |
177 false, false, false, false, false, | |
178 false}}, | |
179 // s with caron | |
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com", | |
181 {true, false, false, false, false, | |
182 false, false, false, false, false, | |
183 false, false, false, false, false, | |
184 false, false, false, false, false, | |
185 false}}, | |
186 // TODO(jungshik) : Add examples with Cyrillic letters | |
187 // only used in some languages written in Cyrillic. | |
188 // Eutopia (Greek) | |
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", | |
190 {true, false, false, false, false, | |
191 false, false, false, true, false, | |
192 false, false, false, false, false, | |
193 false, true, false, false, false, | |
194 false}}, | |
195 // Eutopia + 123 (Greek) | |
196 {"xn---123-pldm0haj2bk.gr", | |
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", | |
198 {true, false, false, false, false, | |
199 false, false, false, true, false, | |
200 false, false, false, false, false, | |
201 false, true, false, false, false, | |
202 false}}, | |
203 // Cyrillic (Russian) | |
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", | |
205 {true, false, false, false, false, | |
206 false, false, true, false, false, | |
207 false, false, false, false, false, | |
208 false, false, false, false, true, | |
209 true}}, | |
210 // Cyrillic + 123 (Russian) | |
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", | |
212 {true, false, false, false, false, | |
213 false, false, true, false, false, | |
214 false, false, false, false, false, | |
215 false, false, false, false, true, | |
216 true}}, | |
217 // Arabic | |
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", | |
219 {true, false, false, false, false, | |
220 false, true, false, false, false, | |
221 false, false, false, false, false, | |
222 false, false, false, false, false, | |
223 false}}, | |
224 // Hebrew | |
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", | |
226 {true, false, false, false, false, | |
227 true, false, false, false, false, | |
228 false, false, false, false, false, | |
229 false, false, false, false, true, | |
230 false}}, | |
231 // Thai | |
232 {"xn--12c2cc4ag3b4ccu.th", | |
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", | |
234 {true, false, false, false, false, | |
235 false, false, false, false, false, | |
236 false, false, false, true, false, | |
237 false, false, false, false, false, | |
238 false}}, | |
239 // Devangari (Hindi) | |
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", | |
241 {true, false, false, false, false, | |
242 false, false, false, false, false, | |
243 false, false, false, false, true, | |
244 false, false, false, false, false, | |
245 false}}, | |
246 // Invalid IDN | |
247 {"xn--hello?world.com", NULL, | |
248 {false, false, false, false, false, | |
249 false, false, false, false, false, | |
250 false, false, false, false, false, | |
251 false, false, false, false, false, | |
252 false}}, | |
253 // Unsafe IDNs | |
254 // "payp<alpha>l.com" | |
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", | |
256 {false, false, false, false, false, | |
257 false, false, false, false, false, | |
258 false, false, false, false, false, | |
259 false, false, false, false, false, | |
260 false}}, | |
261 // google.gr with Greek omicron and epsilon | |
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", | |
263 {false, false, false, false, false, | |
264 false, false, false, false, false, | |
265 false, false, false, false, false, | |
266 false, false, false, false, false, | |
267 false}}, | |
268 // google.ru with Cyrillic o | |
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", | |
270 {false, false, false, false, false, | |
271 false, false, false, false, false, | |
272 false, false, false, false, false, | |
273 false, false, false, false, false, | |
274 false}}, | |
275 // h<e with acute>llo<China in Han>.cn | |
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", | |
277 {false, false, false, false, false, | |
278 false, false, false, false, false, | |
279 false, false, false, false, false, | |
280 false, false, false, false, false, | |
281 false}}, | |
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru | |
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", | |
284 {false, false, false, false, false, | |
285 false, false, false, false, false, | |
286 false, false, false, false, false, | |
287 false, false, false, false, false, | |
288 false}}, | |
289 // One that's really long that will force a buffer realloc | |
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | |
291 "aaaaaaa", | |
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | |
293 L"aaaaaaaa", | |
294 {true, true, true, true, true, | |
295 true, true, true, true, true, | |
296 true, true, true, true, true, | |
297 true, true, true, true, true, | |
298 true}}, | |
299 // Test cases for characters we blacklisted although allowed in IDN. | |
300 // Embedded spaces will be turned to %20 in the display. | |
301 // TODO(jungshik): We need to have more cases. This is a typical | |
302 // data-driven trap. The following test cases need to be separated | |
303 // and tested only for a couple of languages. | |
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", | |
305 {false, false, false, false, false, | |
306 false, false, false, false, false, | |
307 false, false, false, false, false, | |
308 false, false, false, false, false, | |
309 false}}, | |
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", | |
311 {false, false, false, false, false, | |
312 false, false, false, false, false, | |
313 false, false, false, false, false, | |
314 false, false, false, false, false, | |
315 }}, | |
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", | |
317 {false, false, false, false, false, | |
318 false, false, false, false, false, | |
319 false, false, false, false, false, | |
320 false, false, false, false, false, | |
321 }}, | |
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", | |
323 {false, false, false, false, false, | |
324 false, false, false, false, false, | |
325 false, false, false, false, false, | |
326 false, false, false, false, false, | |
327 }}, | |
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", | |
329 {false, false, false, false, false, | |
330 false, false, false, false, false, | |
331 false, false, false, false, false, | |
332 false, false, false, false, false, | |
333 }}, | |
334 // Padlock icon spoof. | |
335 {"xn--google-hj64e", L"\U0001f512google.com", | |
336 {false, false, false, false, false, | |
337 false, false, false, false, false, | |
338 false, false, false, false, false, | |
339 false, false, false, false, false, | |
340 }}, | |
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist | |
342 // all strings with the surrogate '\xdd12'. | |
343 {"xn--fk9c.com", L"\U00010912.com", | |
344 {true, false, false, false, false, | |
345 false, false, false, false, false, | |
346 false, false, false, false, false, | |
347 false, false, false, false, false, | |
348 }}, | |
349 #if 0 | |
350 // These two cases are special. We need a separate test. | |
351 // U+3000 and U+3002 are normalized to ASCII space and dot. | |
352 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", | |
353 {false, false, true, false, false, | |
354 false, false, false, false, false, | |
355 false, false, false, false, false, | |
356 false, false, true, false, false, | |
357 true}}, | |
358 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", | |
359 {false, false, true, false, false, | |
360 false, false, false, false, false, | |
361 false, false, false, false, false, | |
362 false, false, true, false, false, | |
363 true}}, | |
364 #endif | |
365 }; | |
366 | |
367 struct AdjustOffsetCase { | |
368 size_t input_offset; | |
369 size_t output_offset; | |
370 }; | |
371 | |
372 struct UrlTestData { | |
373 const char* const description; | |
374 const char* const input; | |
375 const char* const languages; | |
376 FormatUrlTypes format_types; | |
377 UnescapeRule::Type escape_rules; | |
378 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. | |
379 size_t prefix_len; | |
380 }; | |
381 | |
382 // A helper for IDN*{Fast,Slow}. | |
383 // Append "::<language list>" to |expected| and |actual| to make it | |
384 // easy to tell which sub-case fails without debugging. | |
385 void AppendLanguagesToOutputs(const char* languages, | |
386 base::string16* expected, | |
387 base::string16* actual) { | |
388 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); | |
389 expected->append(to_append); | |
390 actual->append(to_append); | |
391 } | |
392 | |
393 // A pair of helpers for the FormatUrlWithOffsets() test. | |
394 void VerboseExpect(size_t expected, | |
395 size_t actual, | |
396 const std::string& original_url, | |
397 size_t position, | |
398 const base::string16& formatted_url) { | |
399 EXPECT_EQ(expected, actual) << "Original URL: " << original_url | |
400 << " (at char " << position << ")\nFormatted URL: " << formatted_url; | |
401 } | |
402 | |
403 void CheckAdjustedOffsets(const std::string& url_string, | |
404 const std::string& languages, | |
405 FormatUrlTypes format_types, | |
406 UnescapeRule::Type unescape_rules, | |
407 const size_t* output_offsets) { | |
408 GURL url(url_string); | |
409 size_t url_length = url_string.length(); | |
410 std::vector<size_t> offsets; | |
411 for (size_t i = 0; i <= url_length + 1; ++i) | |
412 offsets.push_back(i); | |
413 offsets.push_back(500000); // Something larger than any input length. | |
414 offsets.push_back(std::string::npos); | |
415 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, | |
416 format_types, unescape_rules, NULL, NULL, &offsets); | |
417 for (size_t i = 0; i < url_length; ++i) | |
418 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); | |
419 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, | |
420 url_length, formatted_url); | |
421 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, | |
422 500000, formatted_url); | |
423 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, | |
424 std::string::npos, formatted_url); | |
425 } | |
426 | |
427 } // anonymous namespace | |
428 | |
429 TEST(NetUtilTest, IDNToUnicodeFast) { | |
430 for (size_t i = 0; i < arraysize(idn_cases); i++) { | |
431 for (size_t j = 0; j < arraysize(kLanguages); j++) { | |
432 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow | |
433 if (j == 3 || j == 17 || j == 18) | |
434 continue; | |
435 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | |
436 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | |
437 WideToUTF16(idn_cases[i].unicode_output) : | |
438 ASCIIToUTF16(idn_cases[i].input)); | |
439 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
440 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input | |
441 << "\", languages: \"" << kLanguages[j] | |
442 << "\""; | |
443 } | |
444 } | |
445 } | |
446 | |
447 TEST(NetUtilTest, IDNToUnicodeSlow) { | |
448 for (size_t i = 0; i < arraysize(idn_cases); i++) { | |
449 for (size_t j = 0; j < arraysize(kLanguages); j++) { | |
450 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast | |
451 if (!(j == 3 || j == 17 || j == 18)) | |
452 continue; | |
453 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | |
454 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | |
455 WideToUTF16(idn_cases[i].unicode_output) : | |
456 ASCIIToUTF16(idn_cases[i].input)); | |
457 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
458 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input | |
459 << "\", languages: \"" << kLanguages[j] | |
460 << "\""; | |
461 } | |
462 } | |
463 } | |
464 | |
465 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and | |
466 // te), which was causing a crash (See http://crbug.com/510551). This may be an | |
467 // icu bug, but regardless, that should not cause a crash. | |
468 TEST(NetUtilTest, IDNToUnicodeNeverCrashes) { | |
469 for (char c1 = 'a'; c1 <= 'z'; c1++) { | |
470 for (char c2 = 'a'; c2 <= 'z'; c2++) { | |
471 std::string lang = base::StringPrintf("%c%c", c1, c2); | |
472 base::string16 output(IDNToUnicode("xn--74h", lang)); | |
473 } | |
474 } | |
475 } | |
476 | |
477 TEST(NetUtilTest, StripWWW) { | |
478 EXPECT_EQ(base::string16(), StripWWW(base::string16())); | |
479 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www."))); | |
480 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah"))); | |
481 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); | |
482 } | |
483 | |
484 // This is currently a windows specific function. | |
485 #if defined(OS_WIN) | |
486 namespace { | |
487 | |
488 struct GetDirectoryListingEntryCase { | 20 struct GetDirectoryListingEntryCase { |
489 const wchar_t* name; | 21 const wchar_t* name; |
490 const char* const raw_bytes; | 22 const char* const raw_bytes; |
491 bool is_dir; | 23 bool is_dir; |
492 int64_t filesize; | 24 int64_t filesize; |
493 base::Time time; | 25 base::Time time; |
494 const char* const expected; | 26 const char* const expected; |
495 }; | 27 }; |
496 | 28 |
497 } // namespace | |
498 | |
499 TEST(NetUtilTest, GetDirectoryListingEntry) { | 29 TEST(NetUtilTest, GetDirectoryListingEntry) { |
500 const GetDirectoryListingEntryCase test_cases[] = { | 30 const GetDirectoryListingEntryCase test_cases[] = { |
501 {L"Foo", | 31 {L"Foo", |
502 "", | 32 "", |
503 false, | 33 false, |
504 10000, | 34 10000, |
505 base::Time(), | 35 base::Time(), |
506 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, | 36 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, |
507 {L"quo\"tes", | 37 {L"quo\"tes", |
508 "", | 38 "", |
(...skipping 24 matching lines...) Expand all Loading... |
533 "\xC7\xD1\xB1\xDB.txt", | 63 "\xC7\xD1\xB1\xDB.txt", |
534 false, | 64 false, |
535 10000, | 65 10000, |
536 base::Time(), | 66 base::Time(), |
537 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\"" | 67 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\"" |
538 ",0,\"9.8 kB\",\"\");</script>\n"}, | 68 ",0,\"9.8 kB\",\"\");</script>\n"}, |
539 }; | 69 }; |
540 | 70 |
541 for (size_t i = 0; i < arraysize(test_cases); ++i) { | 71 for (size_t i = 0; i < arraysize(test_cases); ++i) { |
542 const std::string results = GetDirectoryListingEntry( | 72 const std::string results = GetDirectoryListingEntry( |
543 WideToUTF16(test_cases[i].name), | 73 base::WideToUTF16(test_cases[i].name), test_cases[i].raw_bytes, |
544 test_cases[i].raw_bytes, | 74 test_cases[i].is_dir, test_cases[i].filesize, test_cases[i].time); |
545 test_cases[i].is_dir, | |
546 test_cases[i].filesize, | |
547 test_cases[i].time); | |
548 EXPECT_EQ(test_cases[i].expected, results); | 75 EXPECT_EQ(test_cases[i].expected, results); |
549 } | 76 } |
550 } | 77 } |
551 | 78 |
552 #endif | 79 } // namespace |
553 | |
554 TEST(NetUtilTest, FormatUrl) { | |
555 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; | |
556 const UrlTestData tests[] = { | |
557 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0}, | |
558 | |
559 {"Simple URL", | |
560 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL, | |
561 L"http://www.google.com/", 7}, | |
562 | |
563 {"With a port number and a reference", | |
564 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, | |
565 UnescapeRule::NORMAL, | |
566 L"http://www.google.com:8080/#\x30B0", 7}, | |
567 | |
568 // -------- IDN tests -------- | |
569 {"Japanese IDN with ja", | |
570 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, | |
571 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | |
572 | |
573 {"Japanese IDN with en", | |
574 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type, | |
575 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, | |
576 | |
577 {"Japanese IDN without any languages", | |
578 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type, | |
579 UnescapeRule::NORMAL, | |
580 // Single script is safe for empty languages. | |
581 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | |
582 | |
583 {"mailto: with Japanese IDN", | |
584 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, | |
585 UnescapeRule::NORMAL, | |
586 // GURL doesn't assume an email address's domain part as a host name. | |
587 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, | |
588 | |
589 {"file: with Japanese IDN", | |
590 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, | |
591 UnescapeRule::NORMAL, | |
592 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, | |
593 | |
594 {"ftp: with Japanese IDN", | |
595 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, | |
596 UnescapeRule::NORMAL, | |
597 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, | |
598 | |
599 // -------- omit_username_password flag tests -------- | |
600 {"With username and password, omit_username_password=false", | |
601 "http://user:passwd@example.com/foo", "", | |
602 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
603 L"http://user:passwd@example.com/foo", 19}, | |
604 | |
605 {"With username and password, omit_username_password=true", | |
606 "http://user:passwd@example.com/foo", "", default_format_type, | |
607 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | |
608 | |
609 {"With username and no password", | |
610 "http://user@example.com/foo", "", default_format_type, | |
611 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | |
612 | |
613 {"Just '@' without username and password", | |
614 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL, | |
615 L"http://example.com/foo", 7}, | |
616 | |
617 // GURL doesn't think local-part of an email address is username for URL. | |
618 {"mailto:, omit_username_password=true", | |
619 "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL, | |
620 L"mailto:foo@example.com", 7}, | |
621 | |
622 // -------- unescape flag tests -------- | |
623 {"Do not unescape", | |
624 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | |
625 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | |
626 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, | |
627 UnescapeRule::NONE, | |
628 // GURL parses %-encoded hostnames into Punycode. | |
629 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | |
630 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, | |
631 | |
632 {"Unescape normally", | |
633 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | |
634 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | |
635 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, | |
636 UnescapeRule::NORMAL, | |
637 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" | |
638 L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, | |
639 | |
640 {"Unescape normally with BiDi control character", | |
641 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type, | |
642 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, | |
643 | |
644 {"Unescape normally including unescape spaces", | |
645 "http://www.google.com/search?q=Hello%20World", "en", default_format_type, | |
646 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7}, | |
647 | |
648 /* | |
649 {"unescape=true with some special characters", | |
650 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", | |
651 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
652 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, | |
653 */ | |
654 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". | |
655 | |
656 // -------- omit http: -------- | |
657 {"omit http with user name", | |
658 "http://user@example.com/foo", "", kFormatUrlOmitAll, | |
659 UnescapeRule::NORMAL, L"example.com/foo", 0}, | |
660 | |
661 {"omit http", | |
662 "http://www.google.com/", "en", kFormatUrlOmitHTTP, | |
663 UnescapeRule::NORMAL, L"www.google.com/", | |
664 0}, | |
665 | |
666 {"omit http with https", | |
667 "https://www.google.com/", "en", kFormatUrlOmitHTTP, | |
668 UnescapeRule::NORMAL, L"https://www.google.com/", | |
669 8}, | |
670 | |
671 {"omit http starts with ftp.", | |
672 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | |
673 UnescapeRule::NORMAL, L"http://ftp.google.com/", | |
674 7}, | |
675 | |
676 // -------- omit trailing slash on bare hostname -------- | |
677 {"omit slash when it's the entire path", | |
678 "http://www.google.com/", "en", | |
679 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
680 L"http://www.google.com", 7}, | |
681 {"omit slash when there's a ref", | |
682 "http://www.google.com/#ref", "en", | |
683 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
684 L"http://www.google.com/#ref", 7}, | |
685 {"omit slash when there's a query", | |
686 "http://www.google.com/?", "en", | |
687 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
688 L"http://www.google.com/?", 7}, | |
689 {"omit slash when it's not the entire path", | |
690 "http://www.google.com/foo", "en", | |
691 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
692 L"http://www.google.com/foo", 7}, | |
693 {"omit slash for nonstandard URLs", | |
694 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname, | |
695 UnescapeRule::NORMAL, L"data:/", 5}, | |
696 {"omit slash for file URLs", | |
697 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname, | |
698 UnescapeRule::NORMAL, L"file:///", 7}, | |
699 | |
700 // -------- view-source: -------- | |
701 {"view-source", | |
702 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type, | |
703 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", | |
704 19}, | |
705 | |
706 {"view-source of view-source", | |
707 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", | |
708 default_format_type, UnescapeRule::NORMAL, | |
709 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, | |
710 | |
711 // view-source should omit http and trailing slash where non-view-source | |
712 // would. | |
713 {"view-source omit http", | |
714 "view-source:http://a.b/c", "en", kFormatUrlOmitAll, | |
715 UnescapeRule::NORMAL, L"view-source:a.b/c", | |
716 12}, | |
717 {"view-source omit http starts with ftp.", | |
718 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll, | |
719 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c", | |
720 19}, | |
721 {"view-source omit slash when it's the entire path", | |
722 "view-source:http://a.b/", "en", kFormatUrlOmitAll, | |
723 UnescapeRule::NORMAL, L"view-source:a.b", | |
724 12}, | |
725 }; | |
726 | |
727 for (size_t i = 0; i < arraysize(tests); ++i) { | |
728 size_t prefix_len; | |
729 base::string16 formatted = FormatUrl( | |
730 GURL(tests[i].input), tests[i].languages, tests[i].format_types, | |
731 tests[i].escape_rules, NULL, &prefix_len, NULL); | |
732 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; | |
733 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; | |
734 } | |
735 } | |
736 | |
737 TEST(NetUtilTest, FormatUrlParsed) { | |
738 // No unescape case. | |
739 url::Parsed parsed; | |
740 base::string16 formatted = FormatUrl( | |
741 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | |
742 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | |
743 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL, | |
744 NULL); | |
745 EXPECT_EQ(WideToUTF16( | |
746 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | |
747 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); | |
748 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), | |
749 formatted.substr(parsed.username.begin, parsed.username.len)); | |
750 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), | |
751 formatted.substr(parsed.password.begin, parsed.password.len)); | |
752 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | |
753 formatted.substr(parsed.host.begin, parsed.host.len)); | |
754 EXPECT_EQ(WideToUTF16(L"8080"), | |
755 formatted.substr(parsed.port.begin, parsed.port.len)); | |
756 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), | |
757 formatted.substr(parsed.path.begin, parsed.path.len)); | |
758 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), | |
759 formatted.substr(parsed.query.begin, parsed.query.len)); | |
760 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
761 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
762 | |
763 // Unescape case. | |
764 formatted = FormatUrl( | |
765 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | |
766 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | |
767 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL, | |
768 NULL); | |
769 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | |
770 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); | |
771 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
772 formatted.substr(parsed.username.begin, parsed.username.len)); | |
773 EXPECT_EQ(WideToUTF16(L"\x30FC"), | |
774 formatted.substr(parsed.password.begin, parsed.password.len)); | |
775 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | |
776 formatted.substr(parsed.host.begin, parsed.host.len)); | |
777 EXPECT_EQ(WideToUTF16(L"8080"), | |
778 formatted.substr(parsed.port.begin, parsed.port.len)); | |
779 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), | |
780 formatted.substr(parsed.path.begin, parsed.path.len)); | |
781 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), | |
782 formatted.substr(parsed.query.begin, parsed.query.len)); | |
783 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
784 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
785 | |
786 // Omit_username_password + unescape case. | |
787 formatted = FormatUrl( | |
788 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | |
789 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | |
790 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed, | |
791 NULL, NULL); | |
792 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" | |
793 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); | |
794 EXPECT_FALSE(parsed.username.is_valid()); | |
795 EXPECT_FALSE(parsed.password.is_valid()); | |
796 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | |
797 formatted.substr(parsed.host.begin, parsed.host.len)); | |
798 EXPECT_EQ(WideToUTF16(L"8080"), | |
799 formatted.substr(parsed.port.begin, parsed.port.len)); | |
800 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), | |
801 formatted.substr(parsed.path.begin, parsed.path.len)); | |
802 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), | |
803 formatted.substr(parsed.query.begin, parsed.query.len)); | |
804 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
805 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
806 | |
807 // View-source case. | |
808 formatted = | |
809 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), | |
810 std::string(), | |
811 kFormatUrlOmitUsernamePassword, | |
812 UnescapeRule::NORMAL, | |
813 &parsed, | |
814 NULL, | |
815 NULL); | |
816 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), | |
817 formatted); | |
818 EXPECT_EQ(WideToUTF16(L"view-source:http"), | |
819 formatted.substr(parsed.scheme.begin, parsed.scheme.len)); | |
820 EXPECT_FALSE(parsed.username.is_valid()); | |
821 EXPECT_FALSE(parsed.password.is_valid()); | |
822 EXPECT_EQ(WideToUTF16(L"host"), | |
823 formatted.substr(parsed.host.begin, parsed.host.len)); | |
824 EXPECT_EQ(WideToUTF16(L"81"), | |
825 formatted.substr(parsed.port.begin, parsed.port.len)); | |
826 EXPECT_EQ(WideToUTF16(L"/path"), | |
827 formatted.substr(parsed.path.begin, parsed.path.len)); | |
828 EXPECT_EQ(WideToUTF16(L"query"), | |
829 formatted.substr(parsed.query.begin, parsed.query.len)); | |
830 EXPECT_EQ(WideToUTF16(L"ref"), | |
831 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
832 | |
833 // omit http case. | |
834 formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), | |
835 std::string(), | |
836 kFormatUrlOmitHTTP, | |
837 UnescapeRule::NORMAL, | |
838 &parsed, | |
839 NULL, | |
840 NULL); | |
841 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); | |
842 EXPECT_FALSE(parsed.scheme.is_valid()); | |
843 EXPECT_FALSE(parsed.username.is_valid()); | |
844 EXPECT_FALSE(parsed.password.is_valid()); | |
845 EXPECT_EQ(WideToUTF16(L"host"), | |
846 formatted.substr(parsed.host.begin, parsed.host.len)); | |
847 EXPECT_EQ(WideToUTF16(L"8000"), | |
848 formatted.substr(parsed.port.begin, parsed.port.len)); | |
849 EXPECT_EQ(WideToUTF16(L"/a"), | |
850 formatted.substr(parsed.path.begin, parsed.path.len)); | |
851 EXPECT_EQ(WideToUTF16(L"b=c"), | |
852 formatted.substr(parsed.query.begin, parsed.query.len)); | |
853 EXPECT_EQ(WideToUTF16(L"d"), | |
854 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
855 | |
856 // omit http starts with ftp case. | |
857 formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), | |
858 std::string(), | |
859 kFormatUrlOmitHTTP, | |
860 UnescapeRule::NORMAL, | |
861 &parsed, | |
862 NULL, | |
863 NULL); | |
864 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); | |
865 EXPECT_TRUE(parsed.scheme.is_valid()); | |
866 EXPECT_FALSE(parsed.username.is_valid()); | |
867 EXPECT_FALSE(parsed.password.is_valid()); | |
868 EXPECT_EQ(WideToUTF16(L"http"), | |
869 formatted.substr(parsed.scheme.begin, parsed.scheme.len)); | |
870 EXPECT_EQ(WideToUTF16(L"ftp.host"), | |
871 formatted.substr(parsed.host.begin, parsed.host.len)); | |
872 EXPECT_EQ(WideToUTF16(L"8000"), | |
873 formatted.substr(parsed.port.begin, parsed.port.len)); | |
874 EXPECT_EQ(WideToUTF16(L"/a"), | |
875 formatted.substr(parsed.path.begin, parsed.path.len)); | |
876 EXPECT_EQ(WideToUTF16(L"b=c"), | |
877 formatted.substr(parsed.query.begin, parsed.query.len)); | |
878 EXPECT_EQ(WideToUTF16(L"d"), | |
879 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
880 | |
881 // omit http starts with 'f' case. | |
882 formatted = FormatUrl(GURL("http://f/"), | |
883 std::string(), | |
884 kFormatUrlOmitHTTP, | |
885 UnescapeRule::NORMAL, | |
886 &parsed, | |
887 NULL, | |
888 NULL); | |
889 EXPECT_EQ(WideToUTF16(L"f/"), formatted); | |
890 EXPECT_FALSE(parsed.scheme.is_valid()); | |
891 EXPECT_FALSE(parsed.username.is_valid()); | |
892 EXPECT_FALSE(parsed.password.is_valid()); | |
893 EXPECT_FALSE(parsed.port.is_valid()); | |
894 EXPECT_TRUE(parsed.path.is_valid()); | |
895 EXPECT_FALSE(parsed.query.is_valid()); | |
896 EXPECT_FALSE(parsed.ref.is_valid()); | |
897 EXPECT_EQ(WideToUTF16(L"f"), | |
898 formatted.substr(parsed.host.begin, parsed.host.len)); | |
899 EXPECT_EQ(WideToUTF16(L"/"), | |
900 formatted.substr(parsed.path.begin, parsed.path.len)); | |
901 } | |
902 | |
903 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
904 // results in the original GURL, for each ASCII character in the path. | |
905 TEST(NetUtilTest, FormatUrlRoundTripPathASCII) { | |
906 for (unsigned char test_char = 32; test_char < 128; ++test_char) { | |
907 GURL url(std::string("http://www.google.com/") + | |
908 static_cast<char>(test_char)); | |
909 size_t prefix_len; | |
910 base::string16 formatted = FormatUrl(url, | |
911 std::string(), | |
912 kFormatUrlOmitUsernamePassword, | |
913 UnescapeRule::NORMAL, | |
914 NULL, | |
915 &prefix_len, | |
916 NULL); | |
917 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
918 } | |
919 } | |
920 | |
921 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
922 // results in the original GURL, for each escaped ASCII character in the path. | |
923 TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) { | |
924 for (unsigned char test_char = 32; test_char < 128; ++test_char) { | |
925 std::string original_url("http://www.google.com/"); | |
926 original_url.push_back('%'); | |
927 original_url.append(base::HexEncode(&test_char, 1)); | |
928 | |
929 GURL url(original_url); | |
930 size_t prefix_len; | |
931 base::string16 formatted = FormatUrl(url, | |
932 std::string(), | |
933 kFormatUrlOmitUsernamePassword, | |
934 UnescapeRule::NORMAL, | |
935 NULL, | |
936 &prefix_len, | |
937 NULL); | |
938 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
939 } | |
940 } | |
941 | |
942 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
943 // results in the original GURL, for each ASCII character in the query. | |
944 TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) { | |
945 for (unsigned char test_char = 32; test_char < 128; ++test_char) { | |
946 GURL url(std::string("http://www.google.com/?") + | |
947 static_cast<char>(test_char)); | |
948 size_t prefix_len; | |
949 base::string16 formatted = FormatUrl(url, | |
950 std::string(), | |
951 kFormatUrlOmitUsernamePassword, | |
952 UnescapeRule::NORMAL, | |
953 NULL, | |
954 &prefix_len, | |
955 NULL); | |
956 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
957 } | |
958 } | |
959 | |
960 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
961 // only results in a different GURL for certain characters. | |
962 TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) { | |
963 // A full list of characters which FormatURL should unescape and GURL should | |
964 // not escape again, when they appear in a query string. | |
965 const char kUnescapedCharacters[] = | |
966 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; | |
967 for (unsigned char test_char = 0; test_char < 128; ++test_char) { | |
968 std::string original_url("http://www.google.com/?"); | |
969 original_url.push_back('%'); | |
970 original_url.append(base::HexEncode(&test_char, 1)); | |
971 | |
972 GURL url(original_url); | |
973 size_t prefix_len; | |
974 base::string16 formatted = FormatUrl(url, | |
975 std::string(), | |
976 kFormatUrlOmitUsernamePassword, | |
977 UnescapeRule::NORMAL, | |
978 NULL, | |
979 &prefix_len, | |
980 NULL); | |
981 | |
982 if (test_char && | |
983 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { | |
984 EXPECT_NE(url.spec(), GURL(formatted).spec()); | |
985 } else { | |
986 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
987 } | |
988 } | |
989 } | |
990 | |
991 TEST(NetUtilTest, FormatUrlWithOffsets) { | |
992 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, | |
993 UnescapeRule::NORMAL, NULL); | |
994 | |
995 const size_t basic_offsets[] = { | |
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
997 21, 22, 23, 24, 25 | |
998 }; | |
999 CheckAdjustedOffsets("http://www.google.com/foo/", "en", | |
1000 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
1001 basic_offsets); | |
1002 | |
1003 const size_t omit_auth_offsets_1[] = { | |
1004 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, | |
1005 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | |
1006 }; | |
1007 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", | |
1008 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, | |
1009 omit_auth_offsets_1); | |
1010 | |
1011 const size_t omit_auth_offsets_2[] = { | |
1012 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, | |
1013 15, 16, 17, 18, 19, 20, 21 | |
1014 }; | |
1015 CheckAdjustedOffsets("http://foo@www.google.com/", "en", | |
1016 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, | |
1017 omit_auth_offsets_2); | |
1018 | |
1019 const size_t dont_omit_auth_offsets[] = { | |
1020 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1021 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1022 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | |
1023 30, 31 | |
1024 }; | |
1025 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". | |
1026 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", | |
1027 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
1028 dont_omit_auth_offsets); | |
1029 | |
1030 const size_t view_source_offsets[] = { | |
1031 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, | |
1032 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 | |
1033 }; | |
1034 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", | |
1035 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, | |
1036 view_source_offsets); | |
1037 | |
1038 const size_t idn_hostname_offsets_1[] = { | |
1039 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1040 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, | |
1041 13, 14, 15, 16, 17, 18, 19 | |
1042 }; | |
1043 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". | |
1044 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", | |
1045 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
1046 idn_hostname_offsets_1); | |
1047 | |
1048 const size_t idn_hostname_offsets_2[] = { | |
1049 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1050 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, | |
1051 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1052 kNpos, 19, 20, 21, 22, 23, 24 | |
1053 }; | |
1054 // Convert punycode to | |
1055 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". | |
1056 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", | |
1057 "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
1058 idn_hostname_offsets_2); | |
1059 | |
1060 const size_t unescape_offsets[] = { | |
1061 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
1062 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, | |
1063 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1064 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1065 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos | |
1066 }; | |
1067 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". | |
1068 CheckAdjustedOffsets( | |
1069 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | |
1070 "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets); | |
1071 | |
1072 const size_t ref_offsets[] = { | |
1073 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
1074 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, | |
1075 33 | |
1076 }; | |
1077 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". | |
1078 CheckAdjustedOffsets( | |
1079 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", | |
1080 kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets); | |
1081 | |
1082 const size_t omit_http_offsets[] = { | |
1083 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | |
1084 10, 11, 12, 13, 14 | |
1085 }; | |
1086 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, | |
1087 UnescapeRule::NORMAL, omit_http_offsets); | |
1088 | |
1089 const size_t omit_http_start_with_ftp_offsets[] = { | |
1090 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | |
1091 }; | |
1092 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | |
1093 UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets); | |
1094 | |
1095 const size_t omit_all_offsets[] = { | |
1096 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | |
1097 0, 1, 2, 3, 4, 5, 6, 7 | |
1098 }; | |
1099 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, | |
1100 UnescapeRule::NORMAL, omit_all_offsets); | |
1101 } | |
1102 | 80 |
1103 } // namespace net | 81 } // namespace net |
OLD | NEW |