OLD | NEW |
| (Empty) |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "net/base/net_util.h" | |
6 | |
7 #include <string.h> | |
8 | |
9 #include <vector> | |
10 | |
11 #include "base/format_macros.h" | |
12 #include "base/strings/string_number_conversions.h" | |
13 #include "base/strings/utf_string_conversions.h" | |
14 #include "base/time/time.h" | |
15 #include "testing/gtest/include/gtest/gtest.h" | |
16 #include "url/gurl.h" | |
17 | |
18 using base::ASCIIToUTF16; | |
19 using base::WideToUTF16; | |
20 | |
21 namespace net { | |
22 | |
23 namespace { | |
24 | |
25 const size_t kNpos = base::string16::npos; | |
26 | |
27 const char* const kLanguages[] = { | |
28 "", "en", "zh-CN", "ja", "ko", | |
29 "he", "ar", "ru", "el", "fr", | |
30 "de", "pt", "sv", "th", "hi", | |
31 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", | |
32 "zh,ru,en" | |
33 }; | |
34 | |
35 struct IDNTestCase { | |
36 const char* const input; | |
37 const wchar_t* unicode_output; | |
38 const bool unicode_allowed[arraysize(kLanguages)]; | |
39 }; | |
40 | |
41 // TODO(jungshik) This is just a random sample of languages and is far | |
42 // from exhaustive. We may have to generate all the combinations | |
43 // of languages (powerset of a set of all the languages). | |
44 const IDNTestCase idn_cases[] = { | |
45 // No IDN | |
46 {"www.google.com", L"www.google.com", | |
47 {true, true, true, true, true, | |
48 true, true, true, true, true, | |
49 true, true, true, true, true, | |
50 true, true, true, true, true, | |
51 true}}, | |
52 {"www.google.com.", L"www.google.com.", | |
53 {true, true, true, true, true, | |
54 true, true, true, true, true, | |
55 true, true, true, true, true, | |
56 true, true, true, true, true, | |
57 true}}, | |
58 {".", L".", | |
59 {true, true, true, true, true, | |
60 true, true, true, true, true, | |
61 true, true, true, true, true, | |
62 true, true, true, true, true, | |
63 true}}, | |
64 {"", L"", | |
65 {true, true, true, true, true, | |
66 true, true, true, true, true, | |
67 true, true, true, true, true, | |
68 true, true, true, true, true, | |
69 true}}, | |
70 // IDN | |
71 // Hanzi (Traditional Chinese) | |
72 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", | |
73 {true, false, true, true, false, | |
74 false, false, false, false, false, | |
75 false, false, false, false, false, | |
76 false, false, true, true, false, | |
77 true}}, | |
78 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) | |
79 {"xn--cy2a840a.com", L"\x89c6\x9891.com", | |
80 {true, false, true, false, false, | |
81 false, false, false, false, false, | |
82 false, false, false, false, false, | |
83 false, false, false, false, false, | |
84 true}}, | |
85 // Hanzi + '123' | |
86 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", | |
87 {true, false, true, true, false, | |
88 false, false, false, false, false, | |
89 false, false, false, false, false, | |
90 false, false, true, true, false, | |
91 true}}, | |
92 // Hanzi + Latin : U+56FD is simplified and is regarded | |
93 // as not supported in zh-TW. | |
94 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", | |
95 {false, false, true, true, false, | |
96 false, false, false, false, false, | |
97 false, false, false, false, false, | |
98 false, false, false, true, false, | |
99 true}}, | |
100 // Kanji + Kana (Japanese) | |
101 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", | |
102 {true, false, false, true, false, | |
103 false, false, false, false, false, | |
104 false, false, false, false, false, | |
105 false, false, false, true, false, | |
106 false}}, | |
107 // Katakana including U+30FC | |
108 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", | |
109 {true, false, false, true, false, | |
110 false, false, false, false, false, | |
111 false, false, false, false, false, | |
112 false, false, false, true, false, | |
113 }}, | |
114 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", | |
115 {true, false, false, true, false, | |
116 false, false, false, false, false, | |
117 false, false, false, false, false, | |
118 false, false, false, true, false, | |
119 }}, | |
120 // Katakana + Latin (Japanese) | |
121 // TODO(jungshik): Change 'false' in the first element to 'true' | |
122 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead | |
123 // of our IsIDNComponentInSingleScript(). | |
124 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", | |
125 {false, false, false, true, false, | |
126 false, false, false, false, false, | |
127 false, false, false, false, false, | |
128 false, false, false, true, false, | |
129 }}, | |
130 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", | |
131 {false, false, false, true, false, | |
132 false, false, false, false, false, | |
133 false, false, false, false, false, | |
134 false, false, false, true, false, | |
135 }}, | |
136 // Hangul (Korean) | |
137 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", | |
138 {true, false, false, false, true, | |
139 false, false, false, false, false, | |
140 false, false, false, false, false, | |
141 false, false, false, true, false, | |
142 false}}, | |
143 // b<u-umlaut>cher (German) | |
144 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", | |
145 {true, false, false, false, false, | |
146 false, false, false, false, true, | |
147 true, false, false, false, false, | |
148 true, false, false, false, false, | |
149 false}}, | |
150 // a with diaeresis | |
151 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", | |
152 {true, false, false, false, false, | |
153 false, false, false, false, false, | |
154 true, false, true, false, false, | |
155 true, false, false, false, false, | |
156 false}}, | |
157 // c-cedilla (French) | |
158 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", | |
159 {true, false, false, false, false, | |
160 false, false, false, false, true, | |
161 false, true, false, false, false, | |
162 false, false, false, false, false, | |
163 false}}, | |
164 // caf'e with acute accent' (French) | |
165 {"xn--caf-dma.fr", L"caf\x00e9.fr", | |
166 {true, false, false, false, false, | |
167 false, false, false, false, true, | |
168 false, true, true, false, false, | |
169 false, false, false, false, false, | |
170 false}}, | |
171 // c-cedillla and a with tilde (Portuguese) | |
172 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", | |
173 {true, false, false, false, false, | |
174 false, false, false, false, false, | |
175 false, true, false, false, false, | |
176 false, false, false, false, false, | |
177 false}}, | |
178 // s with caron | |
179 {"xn--achy-f6a.com", L"\x0161" L"achy.com", | |
180 {true, false, false, false, false, | |
181 false, false, false, false, false, | |
182 false, false, false, false, false, | |
183 false, false, false, false, false, | |
184 false}}, | |
185 // TODO(jungshik) : Add examples with Cyrillic letters | |
186 // only used in some languages written in Cyrillic. | |
187 // Eutopia (Greek) | |
188 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", | |
189 {true, false, false, false, false, | |
190 false, false, false, true, false, | |
191 false, false, false, false, false, | |
192 false, true, false, false, false, | |
193 false}}, | |
194 // Eutopia + 123 (Greek) | |
195 {"xn---123-pldm0haj2bk.gr", | |
196 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", | |
197 {true, false, false, false, false, | |
198 false, false, false, true, false, | |
199 false, false, false, false, false, | |
200 false, true, false, false, false, | |
201 false}}, | |
202 // Cyrillic (Russian) | |
203 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", | |
204 {true, false, false, false, false, | |
205 false, false, true, false, false, | |
206 false, false, false, false, false, | |
207 false, false, false, false, true, | |
208 true}}, | |
209 // Cyrillic + 123 (Russian) | |
210 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", | |
211 {true, false, false, false, false, | |
212 false, false, true, false, false, | |
213 false, false, false, false, false, | |
214 false, false, false, false, true, | |
215 true}}, | |
216 // Arabic | |
217 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", | |
218 {true, false, false, false, false, | |
219 false, true, false, false, false, | |
220 false, false, false, false, false, | |
221 false, false, false, false, false, | |
222 false}}, | |
223 // Hebrew | |
224 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", | |
225 {true, false, false, false, false, | |
226 true, false, false, false, false, | |
227 false, false, false, false, false, | |
228 false, false, false, false, true, | |
229 false}}, | |
230 // Thai | |
231 {"xn--12c2cc4ag3b4ccu.th", | |
232 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", | |
233 {true, false, false, false, false, | |
234 false, false, false, false, false, | |
235 false, false, false, true, false, | |
236 false, false, false, false, false, | |
237 false}}, | |
238 // Devangari (Hindi) | |
239 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", | |
240 {true, false, false, false, false, | |
241 false, false, false, false, false, | |
242 false, false, false, false, true, | |
243 false, false, false, false, false, | |
244 false}}, | |
245 // Invalid IDN | |
246 {"xn--hello?world.com", NULL, | |
247 {false, false, false, false, false, | |
248 false, false, false, false, false, | |
249 false, false, false, false, false, | |
250 false, false, false, false, false, | |
251 false}}, | |
252 // Unsafe IDNs | |
253 // "payp<alpha>l.com" | |
254 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", | |
255 {false, false, false, false, false, | |
256 false, false, false, false, false, | |
257 false, false, false, false, false, | |
258 false, false, false, false, false, | |
259 false}}, | |
260 // google.gr with Greek omicron and epsilon | |
261 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", | |
262 {false, false, false, false, false, | |
263 false, false, false, false, false, | |
264 false, false, false, false, false, | |
265 false, false, false, false, false, | |
266 false}}, | |
267 // google.ru with Cyrillic o | |
268 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", | |
269 {false, false, false, false, false, | |
270 false, false, false, false, false, | |
271 false, false, false, false, false, | |
272 false, false, false, false, false, | |
273 false}}, | |
274 // h<e with acute>llo<China in Han>.cn | |
275 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", | |
276 {false, false, false, false, false, | |
277 false, false, false, false, false, | |
278 false, false, false, false, false, | |
279 false, false, false, false, false, | |
280 false}}, | |
281 // <Greek rho><Cyrillic a><Cyrillic u>.ru | |
282 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", | |
283 {false, false, false, false, false, | |
284 false, false, false, false, false, | |
285 false, false, false, false, false, | |
286 false, false, false, false, false, | |
287 false}}, | |
288 // One that's really long that will force a buffer realloc | |
289 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | |
290 "aaaaaaa", | |
291 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | |
292 L"aaaaaaaa", | |
293 {true, true, true, true, true, | |
294 true, true, true, true, true, | |
295 true, true, true, true, true, | |
296 true, true, true, true, true, | |
297 true}}, | |
298 // Test cases for characters we blacklisted although allowed in IDN. | |
299 // Embedded spaces will be turned to %20 in the display. | |
300 // TODO(jungshik): We need to have more cases. This is a typical | |
301 // data-driven trap. The following test cases need to be separated | |
302 // and tested only for a couple of languages. | |
303 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", | |
304 {false, false, false, false, false, | |
305 false, false, false, false, false, | |
306 false, false, false, false, false, | |
307 false, false, false, false, false, | |
308 false}}, | |
309 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", | |
310 {false, false, false, false, false, | |
311 false, false, false, false, false, | |
312 false, false, false, false, false, | |
313 false, false, false, false, false, | |
314 }}, | |
315 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", | |
316 {false, false, false, false, false, | |
317 false, false, false, false, false, | |
318 false, false, false, false, false, | |
319 false, false, false, false, false, | |
320 }}, | |
321 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", | |
322 {false, false, false, false, false, | |
323 false, false, false, false, false, | |
324 false, false, false, false, false, | |
325 false, false, false, false, false, | |
326 }}, | |
327 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", | |
328 {false, false, false, false, false, | |
329 false, false, false, false, false, | |
330 false, false, false, false, false, | |
331 false, false, false, false, false, | |
332 }}, | |
333 #if 0 | |
334 // These two cases are special. We need a separate test. | |
335 // U+3000 and U+3002 are normalized to ASCII space and dot. | |
336 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", | |
337 {false, false, true, false, false, | |
338 false, false, false, false, false, | |
339 false, false, false, false, false, | |
340 false, false, true, false, false, | |
341 true}}, | |
342 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", | |
343 {false, false, true, false, false, | |
344 false, false, false, false, false, | |
345 false, false, false, false, false, | |
346 false, false, true, false, false, | |
347 true}}, | |
348 #endif | |
349 }; | |
350 | |
351 struct AdjustOffsetCase { | |
352 size_t input_offset; | |
353 size_t output_offset; | |
354 }; | |
355 | |
356 struct UrlTestData { | |
357 const char* const description; | |
358 const char* const input; | |
359 const char* const languages; | |
360 FormatUrlTypes format_types; | |
361 UnescapeRule::Type escape_rules; | |
362 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. | |
363 size_t prefix_len; | |
364 }; | |
365 | |
366 // A helper for IDN*{Fast,Slow}. | |
367 // Append "::<language list>" to |expected| and |actual| to make it | |
368 // easy to tell which sub-case fails without debugging. | |
369 void AppendLanguagesToOutputs(const char* languages, | |
370 base::string16* expected, | |
371 base::string16* actual) { | |
372 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); | |
373 expected->append(to_append); | |
374 actual->append(to_append); | |
375 } | |
376 | |
377 // A pair of helpers for the FormatUrlWithOffsets() test. | |
378 void VerboseExpect(size_t expected, | |
379 size_t actual, | |
380 const std::string& original_url, | |
381 size_t position, | |
382 const base::string16& formatted_url) { | |
383 EXPECT_EQ(expected, actual) << "Original URL: " << original_url | |
384 << " (at char " << position << ")\nFormatted URL: " << formatted_url; | |
385 } | |
386 | |
387 void CheckAdjustedOffsets(const std::string& url_string, | |
388 const std::string& languages, | |
389 FormatUrlTypes format_types, | |
390 UnescapeRule::Type unescape_rules, | |
391 const size_t* output_offsets) { | |
392 GURL url(url_string); | |
393 size_t url_length = url_string.length(); | |
394 std::vector<size_t> offsets; | |
395 for (size_t i = 0; i <= url_length + 1; ++i) | |
396 offsets.push_back(i); | |
397 offsets.push_back(500000); // Something larger than any input length. | |
398 offsets.push_back(std::string::npos); | |
399 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, | |
400 format_types, unescape_rules, NULL, NULL, &offsets); | |
401 for (size_t i = 0; i < url_length; ++i) | |
402 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); | |
403 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, | |
404 url_length, formatted_url); | |
405 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, | |
406 500000, formatted_url); | |
407 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, | |
408 std::string::npos, formatted_url); | |
409 } | |
410 | |
411 } // anonymous namespace | |
412 | |
413 TEST(NetUtilTest, IDNToUnicodeFast) { | |
414 for (size_t i = 0; i < arraysize(idn_cases); i++) { | |
415 for (size_t j = 0; j < arraysize(kLanguages); j++) { | |
416 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow | |
417 if (j == 3 || j == 17 || j == 18) | |
418 continue; | |
419 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | |
420 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | |
421 WideToUTF16(idn_cases[i].unicode_output) : | |
422 ASCIIToUTF16(idn_cases[i].input)); | |
423 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
424 EXPECT_EQ(expected, output); | |
425 } | |
426 } | |
427 } | |
428 | |
429 TEST(NetUtilTest, IDNToUnicodeSlow) { | |
430 for (size_t i = 0; i < arraysize(idn_cases); i++) { | |
431 for (size_t j = 0; j < arraysize(kLanguages); j++) { | |
432 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast | |
433 if (!(j == 3 || j == 17 || j == 18)) | |
434 continue; | |
435 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | |
436 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | |
437 WideToUTF16(idn_cases[i].unicode_output) : | |
438 ASCIIToUTF16(idn_cases[i].input)); | |
439 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
440 EXPECT_EQ(expected, output); | |
441 } | |
442 } | |
443 } | |
444 | |
445 TEST(NetUtilTest, StripWWW) { | |
446 EXPECT_EQ(base::string16(), StripWWW(base::string16())); | |
447 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www."))); | |
448 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah"))); | |
449 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); | |
450 } | |
451 | |
452 // This is currently a windows specific function. | |
453 #if defined(OS_WIN) | |
454 namespace { | |
455 | |
456 struct GetDirectoryListingEntryCase { | |
457 const wchar_t* name; | |
458 const char* const raw_bytes; | |
459 bool is_dir; | |
460 int64 filesize; | |
461 base::Time time; | |
462 const char* const expected; | |
463 }; | |
464 | |
465 } // namespace | |
466 | |
467 TEST(NetUtilTest, GetDirectoryListingEntry) { | |
468 const GetDirectoryListingEntryCase test_cases[] = { | |
469 {L"Foo", | |
470 "", | |
471 false, | |
472 10000, | |
473 base::Time(), | |
474 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, | |
475 {L"quo\"tes", | |
476 "", | |
477 false, | |
478 10000, | |
479 base::Time(), | |
480 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" | |
481 "\n"}, | |
482 {L"quo\"tes", | |
483 "quo\"tes", | |
484 false, | |
485 10000, | |
486 base::Time(), | |
487 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" | |
488 "\n"}, | |
489 // U+D55C0 U+AE00. raw_bytes is empty (either a local file with | |
490 // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8 | |
491 {L"\xD55C\xAE00.txt", | |
492 "", | |
493 false, | |
494 10000, | |
495 base::Time(), | |
496 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\"," | |
497 "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"}, | |
498 // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence: | |
499 // a local or remote file in EUC-KR. | |
500 {L"\xD55C\xAE00.txt", | |
501 "\xC7\xD1\xB1\xDB.txt", | |
502 false, | |
503 10000, | |
504 base::Time(), | |
505 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\"" | |
506 ",0,\"9.8 kB\",\"\");</script>\n"}, | |
507 }; | |
508 | |
509 for (size_t i = 0; i < arraysize(test_cases); ++i) { | |
510 const std::string results = GetDirectoryListingEntry( | |
511 WideToUTF16(test_cases[i].name), | |
512 test_cases[i].raw_bytes, | |
513 test_cases[i].is_dir, | |
514 test_cases[i].filesize, | |
515 test_cases[i].time); | |
516 EXPECT_EQ(test_cases[i].expected, results); | |
517 } | |
518 } | |
519 | |
520 #endif | |
521 | |
522 TEST(NetUtilTest, FormatUrl) { | |
523 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; | |
524 const UrlTestData tests[] = { | |
525 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0}, | |
526 | |
527 {"Simple URL", | |
528 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL, | |
529 L"http://www.google.com/", 7}, | |
530 | |
531 {"With a port number and a reference", | |
532 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, | |
533 UnescapeRule::NORMAL, | |
534 L"http://www.google.com:8080/#\x30B0", 7}, | |
535 | |
536 // -------- IDN tests -------- | |
537 {"Japanese IDN with ja", | |
538 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, | |
539 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | |
540 | |
541 {"Japanese IDN with en", | |
542 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type, | |
543 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, | |
544 | |
545 {"Japanese IDN without any languages", | |
546 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type, | |
547 UnescapeRule::NORMAL, | |
548 // Single script is safe for empty languages. | |
549 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | |
550 | |
551 {"mailto: with Japanese IDN", | |
552 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, | |
553 UnescapeRule::NORMAL, | |
554 // GURL doesn't assume an email address's domain part as a host name. | |
555 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, | |
556 | |
557 {"file: with Japanese IDN", | |
558 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, | |
559 UnescapeRule::NORMAL, | |
560 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, | |
561 | |
562 {"ftp: with Japanese IDN", | |
563 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, | |
564 UnescapeRule::NORMAL, | |
565 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, | |
566 | |
567 // -------- omit_username_password flag tests -------- | |
568 {"With username and password, omit_username_password=false", | |
569 "http://user:passwd@example.com/foo", "", | |
570 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
571 L"http://user:passwd@example.com/foo", 19}, | |
572 | |
573 {"With username and password, omit_username_password=true", | |
574 "http://user:passwd@example.com/foo", "", default_format_type, | |
575 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | |
576 | |
577 {"With username and no password", | |
578 "http://user@example.com/foo", "", default_format_type, | |
579 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | |
580 | |
581 {"Just '@' without username and password", | |
582 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL, | |
583 L"http://example.com/foo", 7}, | |
584 | |
585 // GURL doesn't think local-part of an email address is username for URL. | |
586 {"mailto:, omit_username_password=true", | |
587 "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL, | |
588 L"mailto:foo@example.com", 7}, | |
589 | |
590 // -------- unescape flag tests -------- | |
591 {"Do not unescape", | |
592 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | |
593 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | |
594 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, | |
595 UnescapeRule::NONE, | |
596 // GURL parses %-encoded hostnames into Punycode. | |
597 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | |
598 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, | |
599 | |
600 {"Unescape normally", | |
601 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | |
602 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | |
603 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, | |
604 UnescapeRule::NORMAL, | |
605 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" | |
606 L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, | |
607 | |
608 {"Unescape normally with BiDi control character", | |
609 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type, | |
610 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, | |
611 | |
612 {"Unescape normally including unescape spaces", | |
613 "http://www.google.com/search?q=Hello%20World", "en", default_format_type, | |
614 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7}, | |
615 | |
616 /* | |
617 {"unescape=true with some special characters", | |
618 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", | |
619 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
620 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, | |
621 */ | |
622 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". | |
623 | |
624 // -------- omit http: -------- | |
625 {"omit http with user name", | |
626 "http://user@example.com/foo", "", kFormatUrlOmitAll, | |
627 UnescapeRule::NORMAL, L"example.com/foo", 0}, | |
628 | |
629 {"omit http", | |
630 "http://www.google.com/", "en", kFormatUrlOmitHTTP, | |
631 UnescapeRule::NORMAL, L"www.google.com/", | |
632 0}, | |
633 | |
634 {"omit http with https", | |
635 "https://www.google.com/", "en", kFormatUrlOmitHTTP, | |
636 UnescapeRule::NORMAL, L"https://www.google.com/", | |
637 8}, | |
638 | |
639 {"omit http starts with ftp.", | |
640 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | |
641 UnescapeRule::NORMAL, L"http://ftp.google.com/", | |
642 7}, | |
643 | |
644 // -------- omit trailing slash on bare hostname -------- | |
645 {"omit slash when it's the entire path", | |
646 "http://www.google.com/", "en", | |
647 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
648 L"http://www.google.com", 7}, | |
649 {"omit slash when there's a ref", | |
650 "http://www.google.com/#ref", "en", | |
651 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
652 L"http://www.google.com/#ref", 7}, | |
653 {"omit slash when there's a query", | |
654 "http://www.google.com/?", "en", | |
655 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
656 L"http://www.google.com/?", 7}, | |
657 {"omit slash when it's not the entire path", | |
658 "http://www.google.com/foo", "en", | |
659 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, | |
660 L"http://www.google.com/foo", 7}, | |
661 {"omit slash for nonstandard URLs", | |
662 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname, | |
663 UnescapeRule::NORMAL, L"data:/", 5}, | |
664 {"omit slash for file URLs", | |
665 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname, | |
666 UnescapeRule::NORMAL, L"file:///", 7}, | |
667 | |
668 // -------- view-source: -------- | |
669 {"view-source", | |
670 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type, | |
671 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", | |
672 19}, | |
673 | |
674 {"view-source of view-source", | |
675 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", | |
676 default_format_type, UnescapeRule::NORMAL, | |
677 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, | |
678 | |
679 // view-source should omit http and trailing slash where non-view-source | |
680 // would. | |
681 {"view-source omit http", | |
682 "view-source:http://a.b/c", "en", kFormatUrlOmitAll, | |
683 UnescapeRule::NORMAL, L"view-source:a.b/c", | |
684 12}, | |
685 {"view-source omit http starts with ftp.", | |
686 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll, | |
687 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c", | |
688 19}, | |
689 {"view-source omit slash when it's the entire path", | |
690 "view-source:http://a.b/", "en", kFormatUrlOmitAll, | |
691 UnescapeRule::NORMAL, L"view-source:a.b", | |
692 12}, | |
693 }; | |
694 | |
695 for (size_t i = 0; i < arraysize(tests); ++i) { | |
696 size_t prefix_len; | |
697 base::string16 formatted = FormatUrl( | |
698 GURL(tests[i].input), tests[i].languages, tests[i].format_types, | |
699 tests[i].escape_rules, NULL, &prefix_len, NULL); | |
700 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; | |
701 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; | |
702 } | |
703 } | |
704 | |
705 TEST(NetUtilTest, FormatUrlParsed) { | |
706 // No unescape case. | |
707 url::Parsed parsed; | |
708 base::string16 formatted = FormatUrl( | |
709 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | |
710 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | |
711 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL, | |
712 NULL); | |
713 EXPECT_EQ(WideToUTF16( | |
714 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | |
715 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); | |
716 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), | |
717 formatted.substr(parsed.username.begin, parsed.username.len)); | |
718 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), | |
719 formatted.substr(parsed.password.begin, parsed.password.len)); | |
720 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | |
721 formatted.substr(parsed.host.begin, parsed.host.len)); | |
722 EXPECT_EQ(WideToUTF16(L"8080"), | |
723 formatted.substr(parsed.port.begin, parsed.port.len)); | |
724 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), | |
725 formatted.substr(parsed.path.begin, parsed.path.len)); | |
726 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), | |
727 formatted.substr(parsed.query.begin, parsed.query.len)); | |
728 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
729 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
730 | |
731 // Unescape case. | |
732 formatted = FormatUrl( | |
733 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | |
734 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | |
735 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL, | |
736 NULL); | |
737 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | |
738 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); | |
739 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
740 formatted.substr(parsed.username.begin, parsed.username.len)); | |
741 EXPECT_EQ(WideToUTF16(L"\x30FC"), | |
742 formatted.substr(parsed.password.begin, parsed.password.len)); | |
743 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | |
744 formatted.substr(parsed.host.begin, parsed.host.len)); | |
745 EXPECT_EQ(WideToUTF16(L"8080"), | |
746 formatted.substr(parsed.port.begin, parsed.port.len)); | |
747 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), | |
748 formatted.substr(parsed.path.begin, parsed.path.len)); | |
749 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), | |
750 formatted.substr(parsed.query.begin, parsed.query.len)); | |
751 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
752 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
753 | |
754 // Omit_username_password + unescape case. | |
755 formatted = FormatUrl( | |
756 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | |
757 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | |
758 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed, | |
759 NULL, NULL); | |
760 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" | |
761 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); | |
762 EXPECT_FALSE(parsed.username.is_valid()); | |
763 EXPECT_FALSE(parsed.password.is_valid()); | |
764 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | |
765 formatted.substr(parsed.host.begin, parsed.host.len)); | |
766 EXPECT_EQ(WideToUTF16(L"8080"), | |
767 formatted.substr(parsed.port.begin, parsed.port.len)); | |
768 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), | |
769 formatted.substr(parsed.path.begin, parsed.path.len)); | |
770 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), | |
771 formatted.substr(parsed.query.begin, parsed.query.len)); | |
772 EXPECT_EQ(WideToUTF16(L"\x30B0"), | |
773 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
774 | |
775 // View-source case. | |
776 formatted = | |
777 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), | |
778 std::string(), | |
779 kFormatUrlOmitUsernamePassword, | |
780 UnescapeRule::NORMAL, | |
781 &parsed, | |
782 NULL, | |
783 NULL); | |
784 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), | |
785 formatted); | |
786 EXPECT_EQ(WideToUTF16(L"view-source:http"), | |
787 formatted.substr(parsed.scheme.begin, parsed.scheme.len)); | |
788 EXPECT_FALSE(parsed.username.is_valid()); | |
789 EXPECT_FALSE(parsed.password.is_valid()); | |
790 EXPECT_EQ(WideToUTF16(L"host"), | |
791 formatted.substr(parsed.host.begin, parsed.host.len)); | |
792 EXPECT_EQ(WideToUTF16(L"81"), | |
793 formatted.substr(parsed.port.begin, parsed.port.len)); | |
794 EXPECT_EQ(WideToUTF16(L"/path"), | |
795 formatted.substr(parsed.path.begin, parsed.path.len)); | |
796 EXPECT_EQ(WideToUTF16(L"query"), | |
797 formatted.substr(parsed.query.begin, parsed.query.len)); | |
798 EXPECT_EQ(WideToUTF16(L"ref"), | |
799 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
800 | |
801 // omit http case. | |
802 formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), | |
803 std::string(), | |
804 kFormatUrlOmitHTTP, | |
805 UnescapeRule::NORMAL, | |
806 &parsed, | |
807 NULL, | |
808 NULL); | |
809 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); | |
810 EXPECT_FALSE(parsed.scheme.is_valid()); | |
811 EXPECT_FALSE(parsed.username.is_valid()); | |
812 EXPECT_FALSE(parsed.password.is_valid()); | |
813 EXPECT_EQ(WideToUTF16(L"host"), | |
814 formatted.substr(parsed.host.begin, parsed.host.len)); | |
815 EXPECT_EQ(WideToUTF16(L"8000"), | |
816 formatted.substr(parsed.port.begin, parsed.port.len)); | |
817 EXPECT_EQ(WideToUTF16(L"/a"), | |
818 formatted.substr(parsed.path.begin, parsed.path.len)); | |
819 EXPECT_EQ(WideToUTF16(L"b=c"), | |
820 formatted.substr(parsed.query.begin, parsed.query.len)); | |
821 EXPECT_EQ(WideToUTF16(L"d"), | |
822 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
823 | |
824 // omit http starts with ftp case. | |
825 formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), | |
826 std::string(), | |
827 kFormatUrlOmitHTTP, | |
828 UnescapeRule::NORMAL, | |
829 &parsed, | |
830 NULL, | |
831 NULL); | |
832 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); | |
833 EXPECT_TRUE(parsed.scheme.is_valid()); | |
834 EXPECT_FALSE(parsed.username.is_valid()); | |
835 EXPECT_FALSE(parsed.password.is_valid()); | |
836 EXPECT_EQ(WideToUTF16(L"http"), | |
837 formatted.substr(parsed.scheme.begin, parsed.scheme.len)); | |
838 EXPECT_EQ(WideToUTF16(L"ftp.host"), | |
839 formatted.substr(parsed.host.begin, parsed.host.len)); | |
840 EXPECT_EQ(WideToUTF16(L"8000"), | |
841 formatted.substr(parsed.port.begin, parsed.port.len)); | |
842 EXPECT_EQ(WideToUTF16(L"/a"), | |
843 formatted.substr(parsed.path.begin, parsed.path.len)); | |
844 EXPECT_EQ(WideToUTF16(L"b=c"), | |
845 formatted.substr(parsed.query.begin, parsed.query.len)); | |
846 EXPECT_EQ(WideToUTF16(L"d"), | |
847 formatted.substr(parsed.ref.begin, parsed.ref.len)); | |
848 | |
849 // omit http starts with 'f' case. | |
850 formatted = FormatUrl(GURL("http://f/"), | |
851 std::string(), | |
852 kFormatUrlOmitHTTP, | |
853 UnescapeRule::NORMAL, | |
854 &parsed, | |
855 NULL, | |
856 NULL); | |
857 EXPECT_EQ(WideToUTF16(L"f/"), formatted); | |
858 EXPECT_FALSE(parsed.scheme.is_valid()); | |
859 EXPECT_FALSE(parsed.username.is_valid()); | |
860 EXPECT_FALSE(parsed.password.is_valid()); | |
861 EXPECT_FALSE(parsed.port.is_valid()); | |
862 EXPECT_TRUE(parsed.path.is_valid()); | |
863 EXPECT_FALSE(parsed.query.is_valid()); | |
864 EXPECT_FALSE(parsed.ref.is_valid()); | |
865 EXPECT_EQ(WideToUTF16(L"f"), | |
866 formatted.substr(parsed.host.begin, parsed.host.len)); | |
867 EXPECT_EQ(WideToUTF16(L"/"), | |
868 formatted.substr(parsed.path.begin, parsed.path.len)); | |
869 } | |
870 | |
871 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
872 // results in the original GURL, for each ASCII character in the path. | |
873 TEST(NetUtilTest, FormatUrlRoundTripPathASCII) { | |
874 for (unsigned char test_char = 32; test_char < 128; ++test_char) { | |
875 GURL url(std::string("http://www.google.com/") + | |
876 static_cast<char>(test_char)); | |
877 size_t prefix_len; | |
878 base::string16 formatted = FormatUrl(url, | |
879 std::string(), | |
880 kFormatUrlOmitUsernamePassword, | |
881 UnescapeRule::NORMAL, | |
882 NULL, | |
883 &prefix_len, | |
884 NULL); | |
885 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
886 } | |
887 } | |
888 | |
889 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
890 // results in the original GURL, for each escaped ASCII character in the path. | |
891 TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) { | |
892 for (unsigned char test_char = 32; test_char < 128; ++test_char) { | |
893 std::string original_url("http://www.google.com/"); | |
894 original_url.push_back('%'); | |
895 original_url.append(base::HexEncode(&test_char, 1)); | |
896 | |
897 GURL url(original_url); | |
898 size_t prefix_len; | |
899 base::string16 formatted = FormatUrl(url, | |
900 std::string(), | |
901 kFormatUrlOmitUsernamePassword, | |
902 UnescapeRule::NORMAL, | |
903 NULL, | |
904 &prefix_len, | |
905 NULL); | |
906 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
907 } | |
908 } | |
909 | |
910 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
911 // results in the original GURL, for each ASCII character in the query. | |
912 TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) { | |
913 for (unsigned char test_char = 32; test_char < 128; ++test_char) { | |
914 GURL url(std::string("http://www.google.com/?") + | |
915 static_cast<char>(test_char)); | |
916 size_t prefix_len; | |
917 base::string16 formatted = FormatUrl(url, | |
918 std::string(), | |
919 kFormatUrlOmitUsernamePassword, | |
920 UnescapeRule::NORMAL, | |
921 NULL, | |
922 &prefix_len, | |
923 NULL); | |
924 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
925 } | |
926 } | |
927 | |
928 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL | |
929 // only results in a different GURL for certain characters. | |
930 TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) { | |
931 // A full list of characters which FormatURL should unescape and GURL should | |
932 // not escape again, when they appear in a query string. | |
933 const char kUnescapedCharacters[] = | |
934 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; | |
935 for (unsigned char test_char = 0; test_char < 128; ++test_char) { | |
936 std::string original_url("http://www.google.com/?"); | |
937 original_url.push_back('%'); | |
938 original_url.append(base::HexEncode(&test_char, 1)); | |
939 | |
940 GURL url(original_url); | |
941 size_t prefix_len; | |
942 base::string16 formatted = FormatUrl(url, | |
943 std::string(), | |
944 kFormatUrlOmitUsernamePassword, | |
945 UnescapeRule::NORMAL, | |
946 NULL, | |
947 &prefix_len, | |
948 NULL); | |
949 | |
950 if (test_char && | |
951 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { | |
952 EXPECT_NE(url.spec(), GURL(formatted).spec()); | |
953 } else { | |
954 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | |
955 } | |
956 } | |
957 } | |
958 | |
959 TEST(NetUtilTest, FormatUrlWithOffsets) { | |
960 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, | |
961 UnescapeRule::NORMAL, NULL); | |
962 | |
963 const size_t basic_offsets[] = { | |
964 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
965 21, 22, 23, 24, 25 | |
966 }; | |
967 CheckAdjustedOffsets("http://www.google.com/foo/", "en", | |
968 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
969 basic_offsets); | |
970 | |
971 const size_t omit_auth_offsets_1[] = { | |
972 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, | |
973 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | |
974 }; | |
975 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", | |
976 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, | |
977 omit_auth_offsets_1); | |
978 | |
979 const size_t omit_auth_offsets_2[] = { | |
980 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, | |
981 15, 16, 17, 18, 19, 20, 21 | |
982 }; | |
983 CheckAdjustedOffsets("http://foo@www.google.com/", "en", | |
984 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, | |
985 omit_auth_offsets_2); | |
986 | |
987 const size_t dont_omit_auth_offsets[] = { | |
988 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
989 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
990 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | |
991 30, 31 | |
992 }; | |
993 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". | |
994 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", | |
995 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
996 dont_omit_auth_offsets); | |
997 | |
998 const size_t view_source_offsets[] = { | |
999 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, | |
1000 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 | |
1001 }; | |
1002 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", | |
1003 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, | |
1004 view_source_offsets); | |
1005 | |
1006 const size_t idn_hostname_offsets_1[] = { | |
1007 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1008 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, | |
1009 13, 14, 15, 16, 17, 18, 19 | |
1010 }; | |
1011 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". | |
1012 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", | |
1013 kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
1014 idn_hostname_offsets_1); | |
1015 | |
1016 const size_t idn_hostname_offsets_2[] = { | |
1017 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1018 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, | |
1019 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1020 kNpos, 19, 20, 21, 22, 23, 24 | |
1021 }; | |
1022 // Convert punycode to | |
1023 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". | |
1024 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", | |
1025 "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL, | |
1026 idn_hostname_offsets_2); | |
1027 | |
1028 const size_t unescape_offsets[] = { | |
1029 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
1030 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, | |
1031 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1032 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | |
1033 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos | |
1034 }; | |
1035 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". | |
1036 CheckAdjustedOffsets( | |
1037 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | |
1038 "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets); | |
1039 | |
1040 const size_t ref_offsets[] = { | |
1041 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
1042 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, | |
1043 33 | |
1044 }; | |
1045 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". | |
1046 CheckAdjustedOffsets( | |
1047 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", | |
1048 kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets); | |
1049 | |
1050 const size_t omit_http_offsets[] = { | |
1051 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | |
1052 10, 11, 12, 13, 14 | |
1053 }; | |
1054 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, | |
1055 UnescapeRule::NORMAL, omit_http_offsets); | |
1056 | |
1057 const size_t omit_http_start_with_ftp_offsets[] = { | |
1058 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | |
1059 }; | |
1060 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | |
1061 UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets); | |
1062 | |
1063 const size_t omit_all_offsets[] = { | |
1064 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | |
1065 0, 1, 2, 3, 4, 5, 6, 7 | |
1066 }; | |
1067 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, | |
1068 UnescapeRule::NORMAL, omit_all_offsets); | |
1069 } | |
1070 | |
1071 } // namespace net | |
OLD | NEW |