OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <string.h> | 7 #include <string.h> |
8 | 8 |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "base/macros.h" | 11 #include "base/macros.h" |
12 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
13 #include "base/strings/stringprintf.h" | 13 #include "base/strings/stringprintf.h" |
14 #include "base/strings/utf_string_conversions.h" | 14 #include "base/strings/utf_string_conversions.h" |
15 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
16 #include "url/gurl.h" | 16 #include "url/gurl.h" |
17 | 17 |
18 | 18 |
19 namespace url_formatter { | 19 namespace url_formatter { |
20 | 20 |
21 namespace { | 21 namespace { |
22 | 22 |
23 using base::WideToUTF16; | 23 using base::WideToUTF16; |
24 using base::ASCIIToUTF16; | 24 using base::ASCIIToUTF16; |
25 | 25 |
26 const size_t kNpos = base::string16::npos; | 26 const size_t kNpos = base::string16::npos; |
27 | 27 |
28 const char* const kLanguages[] = { | |
29 "", "en", "zh-CN", "ja", "ko", | |
30 "he", "ar", "ru", "el", "fr", | |
31 "de", "pt", "sv", "th", "hi", | |
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", | |
33 "zh,ru,en" | |
34 }; | |
35 | |
36 struct IDNTestCase { | 28 struct IDNTestCase { |
37 const char* const input; | 29 const char* const input; |
38 const wchar_t* unicode_output; | 30 const wchar_t* unicode_output; |
39 const bool unicode_allowed[arraysize(kLanguages)]; | 31 const bool unicode_allowed; |
40 }; | 32 }; |
41 | 33 |
42 // TODO(jungshik) This is just a random sample of languages and is far | 34 // TODO(jungshik) This is just a random sample of languages and is far |
43 // from exhaustive. We may have to generate all the combinations | 35 // from exhaustive. We may have to generate all the combinations |
44 // of languages (powerset of a set of all the languages). | 36 // of languages (powerset of a set of all the languages). |
45 const IDNTestCase idn_cases[] = { | 37 const IDNTestCase idn_cases[] = { |
46 // No IDN | 38 // No IDN |
47 {"www.google.com", L"www.google.com", | 39 {"www.google.com", L"www.google.com", true}, |
48 {true, true, true, true, true, | 40 {"www.google.com.", L"www.google.com.", true}, |
49 true, true, true, true, true, | 41 {".", L".", true}, |
50 true, true, true, true, true, | 42 {"", L"", true}, |
51 true, true, true, true, true, | 43 // IDN |
52 true}}, | 44 // Hanzi (Traditional Chinese) |
53 {"www.google.com.", L"www.google.com.", | 45 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, |
54 {true, true, true, true, true, | 46 // Hanzi ('video' in Simplified Chinese |
55 true, true, true, true, true, | 47 {"xn--cy2a840a.com", L"\x89c6\x9891.com", true}, |
56 true, true, true, true, true, | 48 // Hanzi + '123' |
57 true, true, true, true, true, | 49 {"www.xn--123-p18d.com", |
58 true}}, | 50 L"www.\x4e00" |
59 {".", L".", | 51 L"123.com", |
60 {true, true, true, true, true, | 52 true}, |
61 true, true, true, true, true, | 53 // Hanzi + Latin : U+56FD is simplified |
62 true, true, true, true, true, | 54 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true}, |
63 true, true, true, true, true, | 55 // Kanji + Kana (Japanese) |
64 true}}, | 56 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true}, |
65 {"", L"", | 57 // Katakana including U+30FC |
66 {true, true, true, true, true, | 58 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true}, |
67 true, true, true, true, true, | 59 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true}, |
68 true, true, true, true, true, | 60 // Katakana + Latin (Japanese) |
69 true, true, true, true, true, | 61 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true}, |
70 true}}, | 62 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true}, |
71 // IDN | 63 // Hangul (Korean) |
72 // Hanzi (Traditional Chinese) | 64 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true}, |
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", | 65 // b<u-umlaut>cher (German) |
74 {true, false, true, true, false, | 66 {"xn--bcher-kva.de", |
75 false, false, false, false, false, | 67 L"b\x00fc" |
76 false, false, false, false, false, | 68 L"cher.de", |
77 false, false, true, true, false, | 69 true}, |
78 true}}, | 70 // a with diaeresis |
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) | 71 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true}, |
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com", | 72 // c-cedilla (French) |
81 {true, false, true, false, false, | 73 {"www.xn--alliancefranaise-npb.fr", |
82 false, false, false, false, false, | 74 L"www.alliancefran\x00e7" |
83 false, false, false, false, false, | 75 L"aise.fr", |
84 false, false, false, false, false, | 76 true}, |
85 true}}, | 77 // caf'e with acute accent' (French) |
86 // Hanzi + '123' | 78 {"xn--caf-dma.fr", L"caf\x00e9.fr", true}, |
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", | 79 // c-cedillla and a with tilde (Portuguese) |
88 {true, false, true, true, false, | 80 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true}, |
89 false, false, false, false, false, | 81 // s with caron |
90 false, false, false, false, false, | 82 {"xn--achy-f6a.com", |
91 false, false, true, true, false, | 83 L"\x0161" |
92 true}}, | 84 L"achy.com", |
93 // Hanzi + Latin : U+56FD is simplified and is regarded | 85 true}, |
94 // as not supported in zh-TW. | 86 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", |
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", | 87 true}, |
96 {false, false, true, true, false, | 88 // Eutopia + 123 (Greek) |
97 false, false, false, false, false, | 89 {"xn---123-pldm0haj2bk.gr", |
98 false, false, false, false, false, | 90 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true}, |
99 false, false, false, true, false, | 91 // Cyrillic (Russian) |
100 true}}, | 92 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true}, |
101 // Kanji + Kana (Japanese) | 93 // Cyrillic + 123 (Russian) |
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", | 94 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true}, |
103 {true, false, false, true, false, | 95 // Arabic |
104 false, false, false, false, false, | 96 {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true}, |
105 false, false, false, false, false, | 97 // Hebrew |
106 false, false, false, true, false, | 98 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true}, |
107 false}}, | 99 // Thai |
108 // Katakana including U+30FC | 100 {"xn--12c2cc4ag3b4ccu.th", |
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", | 101 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, |
110 {true, false, false, true, false, | 102 // Devangari (Hindi) |
111 false, false, false, false, false, | 103 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, |
112 false, false, false, false, false, | 104 // Invalid IDN |
113 false, false, false, true, false, | 105 {"xn--hello?world.com", NULL, false}, |
114 }}, | 106 // Unsafe IDNs |
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", | 107 // "payp<alpha>l.com" |
116 {true, false, false, true, false, | 108 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, |
117 false, false, false, false, false, | 109 // google.gr with Greek omicron and epsilon |
118 false, false, false, false, false, | 110 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false}, |
119 false, false, false, true, false, | 111 // google.ru with Cyrillic o |
120 }}, | 112 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false}, |
121 // Katakana + Latin (Japanese) | 113 // h<e with acute>llo<China in Han>.cn |
122 // TODO(jungshik): Change 'false' in the first element to 'true' | 114 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false}, |
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead | 115 // <Greek rho><Cyrillic a><Cyrillic u>.ru |
124 // of our IsIDNComponentInSingleScript(). | 116 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false}, |
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", | 117 // One that's really long that will force a buffer realloc |
126 {false, false, false, true, false, | 118 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
127 false, false, false, false, false, | 119 "aaaaaaa", |
128 false, false, false, false, false, | 120 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
129 false, false, false, true, false, | 121 L"aaaaaaaa", |
130 }}, | 122 true}, |
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", | 123 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, |
132 {false, false, false, true, false, | 124 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, |
133 false, false, false, false, false, | 125 // Test cases for characters we blacklisted although allowed in IDN. |
134 false, false, false, false, false, | 126 {"google.xn--comabc-k8d", |
135 false, false, false, true, false, | 127 L"google.com\x0338" |
136 }}, | 128 L"abc", |
137 // Hangul (Korean) | 129 false}, |
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", | 130 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false}, |
139 {true, false, false, false, true, | 131 {"google.xn--comevil-v04f.jp", |
140 false, false, false, false, false, | 132 L"google.com\x30ce" |
141 false, false, false, false, false, | 133 L"evil.jp", |
142 false, false, false, true, false, | 134 false}, |
143 false}}, | 135 // Padlock icon spoof. |
144 // b<u-umlaut>cher (German) | 136 {"xn--google-hj64e", L"\U0001f512google.com", false}, |
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", | 137 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist |
146 {true, false, false, false, false, | 138 // all strings with the surrogate '\xdd12'. |
147 false, false, false, false, true, | 139 {"xn--fk9c.com", L"\U00010912.com", false}, |
148 true, false, false, false, false, | 140 {"xn--g6h.com", L"\x2665.com", true}, |
149 true, false, false, false, false, | 141 {"xn--2ci.com", L"\x272a.com", true}, |
150 false}}, | |
151 // a with diaeresis | |
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", | |
153 {true, false, false, false, false, | |
154 false, false, false, false, false, | |
155 true, false, true, false, false, | |
156 true, false, false, false, false, | |
157 false}}, | |
158 // c-cedilla (French) | |
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", | |
160 {true, false, false, false, false, | |
161 false, false, false, false, true, | |
162 false, true, false, false, false, | |
163 false, false, false, false, false, | |
164 false}}, | |
165 // caf'e with acute accent' (French) | |
166 {"xn--caf-dma.fr", L"caf\x00e9.fr", | |
167 {true, false, false, false, false, | |
168 false, false, false, false, true, | |
169 false, true, true, false, false, | |
170 false, false, false, false, false, | |
171 false}}, | |
172 // c-cedillla and a with tilde (Portuguese) | |
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", | |
174 {true, false, false, false, false, | |
175 false, false, false, false, false, | |
176 false, true, false, false, false, | |
177 false, false, false, false, false, | |
178 false}}, | |
179 // s with caron | |
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com", | |
181 {true, false, false, false, false, | |
182 false, false, false, false, false, | |
183 false, false, false, false, false, | |
184 false, false, false, false, false, | |
185 false}}, | |
186 // TODO(jungshik) : Add examples with Cyrillic letters | |
187 // only used in some languages written in Cyrillic. | |
188 // Eutopia (Greek) | |
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", | |
190 {true, false, false, false, false, | |
191 false, false, false, true, false, | |
192 false, false, false, false, false, | |
193 false, true, false, false, false, | |
194 false}}, | |
195 // Eutopia + 123 (Greek) | |
196 {"xn---123-pldm0haj2bk.gr", | |
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", | |
198 {true, false, false, false, false, | |
199 false, false, false, true, false, | |
200 false, false, false, false, false, | |
201 false, true, false, false, false, | |
202 false}}, | |
203 // Cyrillic (Russian) | |
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", | |
205 {true, false, false, false, false, | |
206 false, false, true, false, false, | |
207 false, false, false, false, false, | |
208 false, false, false, false, true, | |
209 true}}, | |
210 // Cyrillic + 123 (Russian) | |
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", | |
212 {true, false, false, false, false, | |
213 false, false, true, false, false, | |
214 false, false, false, false, false, | |
215 false, false, false, false, true, | |
216 true}}, | |
217 // Arabic | |
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", | |
219 {true, false, false, false, false, | |
220 false, true, false, false, false, | |
221 false, false, false, false, false, | |
222 false, false, false, false, false, | |
223 false}}, | |
224 // Hebrew | |
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", | |
226 {true, false, false, false, false, | |
227 true, false, false, false, false, | |
228 false, false, false, false, false, | |
229 false, false, false, false, true, | |
230 false}}, | |
231 // Thai | |
232 {"xn--12c2cc4ag3b4ccu.th", | |
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", | |
234 {true, false, false, false, false, | |
235 false, false, false, false, false, | |
236 false, false, false, true, false, | |
237 false, false, false, false, false, | |
238 false}}, | |
239 // Devangari (Hindi) | |
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", | |
241 {true, false, false, false, false, | |
242 false, false, false, false, false, | |
243 false, false, false, false, true, | |
244 false, false, false, false, false, | |
245 false}}, | |
246 // Invalid IDN | |
247 {"xn--hello?world.com", NULL, | |
248 {false, false, false, false, false, | |
249 false, false, false, false, false, | |
250 false, false, false, false, false, | |
251 false, false, false, false, false, | |
252 false}}, | |
253 // Unsafe IDNs | |
254 // "payp<alpha>l.com" | |
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", | |
256 {false, false, false, false, false, | |
257 false, false, false, false, false, | |
258 false, false, false, false, false, | |
259 false, false, false, false, false, | |
260 false}}, | |
261 // google.gr with Greek omicron and epsilon | |
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", | |
263 {false, false, false, false, false, | |
264 false, false, false, false, false, | |
265 false, false, false, false, false, | |
266 false, false, false, false, false, | |
267 false}}, | |
268 // google.ru with Cyrillic o | |
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", | |
270 {false, false, false, false, false, | |
271 false, false, false, false, false, | |
272 false, false, false, false, false, | |
273 false, false, false, false, false, | |
274 false}}, | |
275 // h<e with acute>llo<China in Han>.cn | |
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", | |
277 {false, false, false, false, false, | |
278 false, false, false, false, false, | |
279 false, false, false, false, false, | |
280 false, false, false, false, false, | |
281 false}}, | |
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru | |
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", | |
284 {false, false, false, false, false, | |
285 false, false, false, false, false, | |
286 false, false, false, false, false, | |
287 false, false, false, false, false, | |
288 false}}, | |
289 // One that's really long that will force a buffer realloc | |
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | |
291 "aaaaaaa", | |
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | |
293 L"aaaaaaaa", | |
294 {true, true, true, true, true, | |
295 true, true, true, true, true, | |
296 true, true, true, true, true, | |
297 true, true, true, true, true, | |
298 true}}, | |
299 // Test cases for characters we blacklisted although allowed in IDN. | |
300 // Embedded spaces will be turned to %20 in the display. | |
301 // TODO(jungshik): We need to have more cases. This is a typical | |
302 // data-driven trap. The following test cases need to be separated | |
303 // and tested only for a couple of languages. | |
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", | |
305 {false, false, false, false, false, | |
306 false, false, false, false, false, | |
307 false, false, false, false, false, | |
308 false, false, false, false, false, | |
309 false}}, | |
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", | |
311 {false, false, false, false, false, | |
312 false, false, false, false, false, | |
313 false, false, false, false, false, | |
314 false, false, false, false, false, | |
315 }}, | |
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", | |
317 {false, false, false, false, false, | |
318 false, false, false, false, false, | |
319 false, false, false, false, false, | |
320 false, false, false, false, false, | |
321 }}, | |
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", | |
323 {false, false, false, false, false, | |
324 false, false, false, false, false, | |
325 false, false, false, false, false, | |
326 false, false, false, false, false, | |
327 }}, | |
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", | |
329 {false, false, false, false, false, | |
330 false, false, false, false, false, | |
331 false, false, false, false, false, | |
332 false, false, false, false, false, | |
333 }}, | |
334 // Padlock icon spoof. | |
335 {"xn--google-hj64e", L"\U0001f512google.com", | |
336 {false, false, false, false, false, | |
337 false, false, false, false, false, | |
338 false, false, false, false, false, | |
339 false, false, false, false, false, | |
340 }}, | |
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist | |
342 // all strings with the surrogate '\xdd12'. | |
343 {"xn--fk9c.com", L"\U00010912.com", | |
344 {true, false, false, false, false, | |
345 false, false, false, false, false, | |
346 false, false, false, false, false, | |
347 false, false, false, false, false, | |
348 }}, | |
349 #if 0 | 142 #if 0 |
350 // These two cases are special. We need a separate test. | 143 // These two cases are special. We need a separate test. |
351 // U+3000 and U+3002 are normalized to ASCII space and dot. | 144 // U+3000 and U+3002 are normalized to ASCII space and dot. |
352 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", | 145 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", |
353 {false, false, true, false, false, | 146 {false, false, true, false, false, |
354 false, false, false, false, false, | 147 false, false, false, false, false, |
355 false, false, false, false, false, | 148 false, false, false, false, false, |
356 false, false, true, false, false, | 149 false, false, true, false, false, |
357 true}}, | 150 true}}, |
358 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", | 151 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", |
359 {false, false, true, false, false, | 152 {false, false, true, false, false, |
360 false, false, false, false, false, | 153 false, false, false, false, false, |
361 false, false, false, false, false, | 154 false, false, false, false, false, |
362 false, false, true, false, false, | 155 false, false, true, false, false, |
363 true}}, | 156 true}}, |
364 #endif | 157 #endif |
365 }; | 158 }; |
366 | 159 |
367 struct AdjustOffsetCase { | 160 struct AdjustOffsetCase { |
368 size_t input_offset; | 161 size_t input_offset; |
369 size_t output_offset; | 162 size_t output_offset; |
370 }; | 163 }; |
371 | 164 |
372 struct UrlTestData { | 165 struct UrlTestData { |
373 const char* const description; | 166 const char* const description; |
374 const char* const input; | 167 const char* const input; |
375 const char* const languages; | |
376 FormatUrlTypes format_types; | 168 FormatUrlTypes format_types; |
377 net::UnescapeRule::Type escape_rules; | 169 net::UnescapeRule::Type escape_rules; |
378 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. | 170 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. |
379 size_t prefix_len; | 171 size_t prefix_len; |
380 }; | 172 }; |
381 | 173 |
382 // A helper for IDN*{Fast,Slow}. | |
383 // Append "::<language list>" to |expected| and |actual| to make it | |
384 // easy to tell which sub-case fails without debugging. | |
385 void AppendLanguagesToOutputs(const char* languages, | |
386 base::string16* expected, | |
387 base::string16* actual) { | |
388 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); | |
389 expected->append(to_append); | |
390 actual->append(to_append); | |
391 } | |
392 | |
393 // A pair of helpers for the FormatUrlWithOffsets() test. | 174 // A pair of helpers for the FormatUrlWithOffsets() test. |
394 void VerboseExpect(size_t expected, | 175 void VerboseExpect(size_t expected, |
395 size_t actual, | 176 size_t actual, |
396 const std::string& original_url, | 177 const std::string& original_url, |
397 size_t position, | 178 size_t position, |
398 const base::string16& formatted_url) { | 179 const base::string16& formatted_url) { |
399 EXPECT_EQ(expected, actual) << "Original URL: " << original_url | 180 EXPECT_EQ(expected, actual) << "Original URL: " << original_url |
400 << " (at char " << position << ")\nFormatted URL: " << formatted_url; | 181 << " (at char " << position << ")\nFormatted URL: " << formatted_url; |
401 } | 182 } |
402 | 183 |
403 void CheckAdjustedOffsets(const std::string& url_string, | 184 void CheckAdjustedOffsets(const std::string& url_string, |
404 const std::string& languages, | |
405 FormatUrlTypes format_types, | 185 FormatUrlTypes format_types, |
406 net::UnescapeRule::Type unescape_rules, | 186 net::UnescapeRule::Type unescape_rules, |
407 const size_t* output_offsets) { | 187 const size_t* output_offsets) { |
408 GURL url(url_string); | 188 GURL url(url_string); |
409 size_t url_length = url_string.length(); | 189 size_t url_length = url_string.length(); |
410 std::vector<size_t> offsets; | 190 std::vector<size_t> offsets; |
411 for (size_t i = 0; i <= url_length + 1; ++i) | 191 for (size_t i = 0; i <= url_length + 1; ++i) |
412 offsets.push_back(i); | 192 offsets.push_back(i); |
413 offsets.push_back(500000); // Something larger than any input length. | 193 offsets.push_back(500000); // Something larger than any input length. |
414 offsets.push_back(std::string::npos); | 194 offsets.push_back(std::string::npos); |
415 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, | 195 base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(), |
416 format_types, unescape_rules, NULL, NULL, &offsets); | 196 format_types, unescape_rules, NULL, NULL, &offsets); |
417 for (size_t i = 0; i < url_length; ++i) | 197 for (size_t i = 0; i < url_length; ++i) |
418 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); | 198 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); |
419 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, | 199 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, |
420 url_length, formatted_url); | 200 url_length, formatted_url); |
421 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, | 201 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, |
422 500000, formatted_url); | 202 500000, formatted_url); |
423 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, | 203 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, |
424 std::string::npos, formatted_url); | 204 std::string::npos, formatted_url); |
425 } | 205 } |
426 | 206 |
427 TEST(UrlFormatterTest, IDNToUnicodeFast) { | 207 TEST(UrlFormatterTest, IDNToUnicode) { |
428 for (size_t i = 0; i < arraysize(idn_cases); i++) { | 208 for (size_t i = 0; i < arraysize(idn_cases); i++) { |
429 for (size_t j = 0; j < arraysize(kLanguages); j++) { | 209 base::string16 output(IDNToUnicode(idn_cases[i].input, std::string())); |
430 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow | 210 base::string16 expected(idn_cases[i].unicode_allowed |
431 if (j == 3 || j == 17 || j == 18) | 211 ? WideToUTF16(idn_cases[i].unicode_output) |
432 continue; | 212 : ASCIIToUTF16(idn_cases[i].input)); |
433 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | 213 EXPECT_EQ(expected, output) << "input # " << i << ": \"" |
434 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | 214 << idn_cases[i].input << "\""; |
435 WideToUTF16(idn_cases[i].unicode_output) : | |
436 ASCIIToUTF16(idn_cases[i].input)); | |
437 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
438 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input | |
439 << "\", languages: \"" << kLanguages[j] | |
440 << "\""; | |
441 } | |
442 } | |
443 } | |
444 | |
445 TEST(UrlFormatterTest, IDNToUnicodeSlow) { | |
446 for (size_t i = 0; i < arraysize(idn_cases); i++) { | |
447 for (size_t j = 0; j < arraysize(kLanguages); j++) { | |
448 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast | |
449 if (!(j == 3 || j == 17 || j == 18)) | |
450 continue; | |
451 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | |
452 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | |
453 WideToUTF16(idn_cases[i].unicode_output) : | |
454 ASCIIToUTF16(idn_cases[i].input)); | |
455 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
456 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input | |
457 << "\", languages: \"" << kLanguages[j] | |
458 << "\""; | |
459 } | |
460 } | |
461 } | |
462 | |
463 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and | |
464 // te), which was causing a crash (See http://crbug.com/510551). This may be an | |
465 // icu bug, but regardless, that should not cause a crash. | |
466 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) { | |
467 for (char c1 = 'a'; c1 <= 'z'; c1++) { | |
468 for (char c2 = 'a'; c2 <= 'z'; c2++) { | |
469 std::string lang = base::StringPrintf("%c%c", c1, c2); | |
470 base::string16 output(IDNToUnicode("xn--74h", lang)); | |
471 } | |
472 } | 215 } |
473 } | 216 } |
474 | 217 |
475 TEST(UrlFormatterTest, FormatUrl) { | 218 TEST(UrlFormatterTest, FormatUrl) { |
476 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; | 219 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; |
477 const UrlTestData tests[] = { | 220 const UrlTestData tests[] = { |
478 {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", | 221 {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"", |
479 0}, | 222 0}, |
480 | 223 |
481 {"Simple URL", "http://www.google.com/", "", default_format_type, | 224 {"Simple URL", "http://www.google.com/", default_format_type, |
482 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, | 225 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, |
483 | 226 |
484 {"With a port number and a reference", | 227 {"With a port number and a reference", |
485 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, | 228 "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type, |
486 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, | 229 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, |
487 | 230 |
488 // -------- IDN tests -------- | 231 // -------- IDN tests -------- |
489 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", | 232 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", |
490 default_format_type, net::UnescapeRule::NORMAL, | 233 default_format_type, net::UnescapeRule::NORMAL, |
491 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | 234 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, |
492 | 235 |
493 {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", | 236 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", |
494 default_format_type, net::UnescapeRule::NORMAL, | |
495 L"http://xn--l8jvb1ey91xtjb.jp/", 7}, | |
496 | |
497 {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "", | |
498 default_format_type, net::UnescapeRule::NORMAL, | |
499 // Single script is safe for empty languages. | |
500 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | |
501 | |
502 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", | |
503 default_format_type, net::UnescapeRule::NORMAL, | 237 default_format_type, net::UnescapeRule::NORMAL, |
504 // GURL doesn't assume an email address's domain part as a host name. | 238 // GURL doesn't assume an email address's domain part as a host name. |
505 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, | 239 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, |
506 | 240 |
507 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", | 241 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", |
508 "ja", default_format_type, net::UnescapeRule::NORMAL, | 242 default_format_type, net::UnescapeRule::NORMAL, |
509 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, | 243 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, |
510 | 244 |
511 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", | 245 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", |
512 default_format_type, net::UnescapeRule::NORMAL, | 246 default_format_type, net::UnescapeRule::NORMAL, |
513 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, | 247 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, |
514 | 248 |
515 // -------- omit_username_password flag tests -------- | 249 // -------- omit_username_password flag tests -------- |
516 {"With username and password, omit_username_password=false", | 250 {"With username and password, omit_username_password=false", |
517 "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, | 251 "http://user:passwd@example.com/foo", kFormatUrlOmitNothing, |
518 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, | 252 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, |
519 | 253 |
520 {"With username and password, omit_username_password=true", | 254 {"With username and password, omit_username_password=true", |
521 "http://user:passwd@example.com/foo", "", default_format_type, | 255 "http://user:passwd@example.com/foo", default_format_type, |
522 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | 256 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, |
523 | 257 |
524 {"With username and no password", "http://user@example.com/foo", "", | 258 {"With username and no password", "http://user@example.com/foo", |
525 default_format_type, net::UnescapeRule::NORMAL, | 259 default_format_type, net::UnescapeRule::NORMAL, |
526 L"http://example.com/foo", 7}, | 260 L"http://example.com/foo", 7}, |
527 | 261 |
528 {"Just '@' without username and password", "http://@example.com/foo", "", | 262 {"Just '@' without username and password", "http://@example.com/foo", |
529 default_format_type, net::UnescapeRule::NORMAL, | 263 default_format_type, net::UnescapeRule::NORMAL, |
530 L"http://example.com/foo", 7}, | 264 L"http://example.com/foo", 7}, |
531 | 265 |
532 // GURL doesn't think local-part of an email address is username for URL. | 266 // GURL doesn't think local-part of an email address is username for URL. |
533 {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", | 267 {"mailto:, omit_username_password=true", "mailto:foo@example.com", |
534 default_format_type, net::UnescapeRule::NORMAL, | 268 default_format_type, net::UnescapeRule::NORMAL, |
535 L"mailto:foo@example.com", 7}, | 269 L"mailto:foo@example.com", 7}, |
536 | 270 |
537 // -------- unescape flag tests -------- | 271 // -------- unescape flag tests -------- |
538 {"Do not unescape", | 272 {"Do not unescape", |
539 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 273 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
540 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 274 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
541 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 275 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
542 "en", default_format_type, net::UnescapeRule::NONE, | 276 default_format_type, net::UnescapeRule::NONE, |
543 // GURL parses %-encoded hostnames into Punycode. | 277 // GURL parses %-encoded hostnames into Punycode. |
544 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 278 L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB
" |
545 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 279 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
546 7}, | 280 7}, |
547 | 281 |
548 {"Unescape normally", | 282 {"Unescape normally", |
549 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 283 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
550 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 284 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
551 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 285 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
552 "en", default_format_type, net::UnescapeRule::NORMAL, | 286 default_format_type, net::UnescapeRule::NORMAL, |
553 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" | 287 L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB" |
554 L"?q=\x30B0\x30FC\x30B0\x30EB", | 288 L"?q=\x30B0\x30FC\x30B0\x30EB", |
555 7}, | 289 7}, |
556 | 290 |
557 {"Unescape normally with BiDi control character", | 291 {"Unescape normally with BiDi control character", |
558 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", | 292 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", |
559 default_format_type, net::UnescapeRule::NORMAL, | 293 default_format_type, net::UnescapeRule::NORMAL, |
560 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, | 294 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, |
561 | 295 |
562 {"Unescape normally including unescape spaces", | 296 {"Unescape normally including unescape spaces", |
563 "http://www.google.com/search?q=Hello%20World", "en", | 297 "http://www.google.com/search?q=Hello%20World", |
564 default_format_type, net::UnescapeRule::SPACES, | 298 default_format_type, net::UnescapeRule::SPACES, |
565 L"http://www.google.com/search?q=Hello World", 7}, | 299 L"http://www.google.com/search?q=Hello World", 7}, |
566 | 300 |
567 /* | 301 /* |
568 {"unescape=true with some special characters", | 302 {"unescape=true with some special characters", |
569 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", | 303 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", |
570 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 304 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
571 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, | 305 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, |
572 */ | 306 */ |
573 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". | 307 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". |
574 | 308 |
575 // -------- omit http: -------- | 309 // -------- omit http: -------- |
576 {"omit http with user name", "http://user@example.com/foo", "", | 310 {"omit http with user name", "http://user@example.com/foo", |
577 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, | 311 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, |
578 | 312 |
579 {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, | 313 {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP, |
580 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, | 314 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, |
581 | 315 |
582 {"omit http with https", "https://www.google.com/", "en", | 316 {"omit http with https", "https://www.google.com/", |
583 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, | 317 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, |
584 L"https://www.google.com/", 8}, | 318 L"https://www.google.com/", 8}, |
585 | 319 |
586 {"omit http starts with ftp.", "http://ftp.google.com/", "en", | 320 {"omit http starts with ftp.", "http://ftp.google.com/", |
587 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", | 321 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", |
588 7}, | 322 7}, |
589 | 323 |
590 // -------- omit trailing slash on bare hostname -------- | 324 // -------- omit trailing slash on bare hostname -------- |
591 {"omit slash when it's the entire path", "http://www.google.com/", "en", | 325 {"omit slash when it's the entire path", "http://www.google.com/", |
592 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 326 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
593 L"http://www.google.com", 7}, | 327 L"http://www.google.com", 7}, |
594 {"omit slash when there's a ref", "http://www.google.com/#ref", "en", | 328 {"omit slash when there's a ref", "http://www.google.com/#ref", |
595 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 329 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
596 L"http://www.google.com/#ref", 7}, | 330 L"http://www.google.com/#ref", 7}, |
597 {"omit slash when there's a query", "http://www.google.com/?", "en", | 331 {"omit slash when there's a query", "http://www.google.com/?", |
598 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 332 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
599 L"http://www.google.com/?", 7}, | 333 L"http://www.google.com/?", 7}, |
600 {"omit slash when it's not the entire path", "http://www.google.com/foo", | 334 {"omit slash when it's not the entire path", "http://www.google.com/foo", |
601 "en", kFormatUrlOmitTrailingSlashOnBareHostname, | 335 kFormatUrlOmitTrailingSlashOnBareHostname, |
602 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, | 336 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, |
603 {"omit slash for nonstandard URLs", "data:/", "en", | 337 {"omit slash for nonstandard URLs", "data:/", |
604 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 338 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
605 L"data:/", 5}, | 339 L"data:/", 5}, |
606 {"omit slash for file URLs", "file:///", "en", | 340 {"omit slash for file URLs", "file:///", |
607 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 341 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
608 L"file:///", 7}, | 342 L"file:///", 7}, |
609 | 343 |
610 // -------- view-source: -------- | 344 // -------- view-source: -------- |
611 {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", | 345 {"view-source", "view-source:http://xn--qcka1pmc.jp/", |
612 default_format_type, net::UnescapeRule::NORMAL, | 346 default_format_type, net::UnescapeRule::NORMAL, |
613 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, | 347 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, |
614 | 348 |
615 {"view-source of view-source", | 349 {"view-source of view-source", |
616 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", | 350 "view-source:view-source:http://xn--qcka1pmc.jp/", |
617 default_format_type, net::UnescapeRule::NORMAL, | 351 default_format_type, net::UnescapeRule::NORMAL, |
618 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, | 352 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, |
619 | 353 |
620 // view-source should omit http and trailing slash where non-view-source | 354 // view-source should omit http and trailing slash where non-view-source |
621 // would. | 355 // would. |
622 {"view-source omit http", "view-source:http://a.b/c", "en", | 356 {"view-source omit http", "view-source:http://a.b/c", |
623 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, | 357 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, |
624 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", | 358 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", |
625 "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, | 359 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, |
626 L"view-source:http://ftp.b/c", 19}, | 360 L"view-source:http://ftp.b/c", 19}, |
627 {"view-source omit slash when it's the entire path", | 361 {"view-source omit slash when it's the entire path", |
628 "view-source:http://a.b/", "en", kFormatUrlOmitAll, | 362 "view-source:http://a.b/", kFormatUrlOmitAll, |
629 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, | 363 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, |
630 }; | 364 }; |
631 | 365 |
632 for (size_t i = 0; i < arraysize(tests); ++i) { | 366 for (size_t i = 0; i < arraysize(tests); ++i) { |
633 size_t prefix_len; | 367 size_t prefix_len; |
634 base::string16 formatted = FormatUrl( | 368 base::string16 formatted = FormatUrl( |
635 GURL(tests[i].input), tests[i].languages, tests[i].format_types, | 369 GURL(tests[i].input), std::string(), tests[i].format_types, |
636 tests[i].escape_rules, NULL, &prefix_len, NULL); | 370 tests[i].escape_rules, NULL, &prefix_len, NULL); |
637 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; | 371 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; |
638 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; | 372 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; |
639 } | 373 } |
640 } | 374 } |
641 | 375 |
642 TEST(UrlFormatterTest, FormatUrlParsed) { | 376 TEST(UrlFormatterTest, FormatUrlParsed) { |
643 // No unescape case. | 377 // No unescape case. |
644 url::Parsed parsed; | 378 url::Parsed parsed; |
645 base::string16 formatted = | 379 base::string16 formatted = |
646 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | 380 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" |
647 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | 381 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), |
648 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, | 382 std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE, |
649 NULL, NULL); | 383 &parsed, NULL, NULL); |
650 EXPECT_EQ(WideToUTF16( | 384 EXPECT_EQ(WideToUTF16( |
651 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | 385 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" |
652 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); | 386 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); |
653 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), | 387 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), |
654 formatted.substr(parsed.username.begin, parsed.username.len)); | 388 formatted.substr(parsed.username.begin, parsed.username.len)); |
655 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), | 389 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), |
656 formatted.substr(parsed.password.begin, parsed.password.len)); | 390 formatted.substr(parsed.password.begin, parsed.password.len)); |
657 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | 391 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), |
658 formatted.substr(parsed.host.begin, parsed.host.len)); | 392 formatted.substr(parsed.host.begin, parsed.host.len)); |
659 EXPECT_EQ(WideToUTF16(L"8080"), | 393 EXPECT_EQ(WideToUTF16(L"8080"), |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
854 if (test_char && | 588 if (test_char && |
855 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { | 589 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { |
856 EXPECT_NE(url.spec(), GURL(formatted).spec()); | 590 EXPECT_NE(url.spec(), GURL(formatted).spec()); |
857 } else { | 591 } else { |
858 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | 592 EXPECT_EQ(url.spec(), GURL(formatted).spec()); |
859 } | 593 } |
860 } | 594 } |
861 } | 595 } |
862 | 596 |
863 TEST(UrlFormatterTest, FormatUrlWithOffsets) { | 597 TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
864 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, | 598 CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing, |
865 net::UnescapeRule::NORMAL, NULL); | 599 net::UnescapeRule::NORMAL, NULL); |
866 | 600 |
867 const size_t basic_offsets[] = { | 601 const size_t basic_offsets[] = { |
868 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 602 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
869 21, 22, 23, 24, 25 | 603 21, 22, 23, 24, 25 |
870 }; | 604 }; |
871 CheckAdjustedOffsets("http://www.google.com/foo/", "en", | 605 CheckAdjustedOffsets("http://www.google.com/foo/", |
872 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 606 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
873 basic_offsets); | 607 basic_offsets); |
874 | 608 |
875 const size_t omit_auth_offsets_1[] = { | 609 const size_t omit_auth_offsets_1[] = { |
876 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, | 610 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, |
877 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 611 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
878 }; | 612 }; |
879 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", | 613 CheckAdjustedOffsets("http://foo:bar@www.google.com/", |
880 kFormatUrlOmitUsernamePassword, | 614 kFormatUrlOmitUsernamePassword, |
881 net::UnescapeRule::NORMAL, omit_auth_offsets_1); | 615 net::UnescapeRule::NORMAL, omit_auth_offsets_1); |
882 | 616 |
883 const size_t omit_auth_offsets_2[] = { | 617 const size_t omit_auth_offsets_2[] = { |
884 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, | 618 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, |
885 15, 16, 17, 18, 19, 20, 21 | 619 15, 16, 17, 18, 19, 20, 21 |
886 }; | 620 }; |
887 CheckAdjustedOffsets("http://foo@www.google.com/", "en", | 621 CheckAdjustedOffsets("http://foo@www.google.com/", |
888 kFormatUrlOmitUsernamePassword, | 622 kFormatUrlOmitUsernamePassword, |
889 net::UnescapeRule::NORMAL, omit_auth_offsets_2); | 623 net::UnescapeRule::NORMAL, omit_auth_offsets_2); |
890 | 624 |
891 const size_t dont_omit_auth_offsets[] = { | 625 const size_t dont_omit_auth_offsets[] = { |
892 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 626 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
893 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 627 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
894 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | 628 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, |
895 30, 31 | 629 30, 31 |
896 }; | 630 }; |
897 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". | 631 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". |
898 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", | 632 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", |
899 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 633 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
900 dont_omit_auth_offsets); | 634 dont_omit_auth_offsets); |
901 | 635 |
902 const size_t view_source_offsets[] = { | 636 const size_t view_source_offsets[] = { |
903 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, | 637 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, |
904 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 | 638 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 |
905 }; | 639 }; |
906 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", | 640 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", |
907 kFormatUrlOmitUsernamePassword, | 641 kFormatUrlOmitUsernamePassword, |
908 net::UnescapeRule::NORMAL, view_source_offsets); | 642 net::UnescapeRule::NORMAL, view_source_offsets); |
909 | 643 |
910 const size_t idn_hostname_offsets_1[] = { | 644 const size_t idn_hostname_offsets_1[] = { |
911 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 645 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
912 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, | 646 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, |
913 13, 14, 15, 16, 17, 18, 19 | 647 13, 14, 15, 16, 17, 18, 19 |
914 }; | 648 }; |
915 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". | 649 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". |
916 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", | 650 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", |
917 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 651 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
918 idn_hostname_offsets_1); | 652 idn_hostname_offsets_1); |
919 | 653 |
920 const size_t idn_hostname_offsets_2[] = { | 654 const size_t idn_hostname_offsets_2[] = { |
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, | 655 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, |
922 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, | 656 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, |
923 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 657 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
924 kNpos, 19, 20, 21, 22, 23, 24 | 658 kNpos, 19, 20, 21, 22, 23, 24 |
925 }; | 659 }; |
926 // Convert punycode to | 660 // Convert punycode to |
927 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". | 661 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". |
928 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", | 662 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", |
929 "zh-CN", kFormatUrlOmitNothing, | 663 kFormatUrlOmitNothing, |
930 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); | 664 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); |
931 | 665 |
932 const size_t unescape_offsets[] = { | 666 const size_t unescape_offsets[] = { |
933 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 667 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
934 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, | 668 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, |
935 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, | 669 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, |
936 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 670 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
937 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos | 671 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos |
938 }; | 672 }; |
939 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". | 673 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". |
940 CheckAdjustedOffsets( | 674 CheckAdjustedOffsets( |
941 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 675 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
942 "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); | 676 kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); |
943 | 677 |
944 const size_t ref_offsets[] = { | 678 const size_t ref_offsets[] = { |
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 679 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
946 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, | 680 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, |
947 33 | 681 33 |
948 }; | 682 }; |
949 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". | 683 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". |
950 CheckAdjustedOffsets( | 684 CheckAdjustedOffsets( |
951 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", | 685 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", |
952 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); | 686 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); |
953 | 687 |
954 const size_t omit_http_offsets[] = { | 688 const size_t omit_http_offsets[] = { |
955 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | 689 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
956 10, 11, 12, 13, 14 | 690 10, 11, 12, 13, 14 |
957 }; | 691 }; |
958 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, | 692 CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP, |
959 net::UnescapeRule::NORMAL, omit_http_offsets); | 693 net::UnescapeRule::NORMAL, omit_http_offsets); |
960 | 694 |
961 const size_t omit_http_start_with_ftp_offsets[] = { | 695 const size_t omit_http_start_with_ftp_offsets[] = { |
962 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 696 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
963 }; | 697 }; |
964 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | 698 CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP, |
965 net::UnescapeRule::NORMAL, | 699 net::UnescapeRule::NORMAL, |
966 omit_http_start_with_ftp_offsets); | 700 omit_http_start_with_ftp_offsets); |
967 | 701 |
968 const size_t omit_all_offsets[] = { | 702 const size_t omit_all_offsets[] = { |
969 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 703 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
970 0, 1, 2, 3, 4, 5, 6, 7 | 704 0, 1, 2, 3, 4, 5, 6, 7 |
971 }; | 705 }; |
972 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, | 706 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
973 net::UnescapeRule::NORMAL, omit_all_offsets); | 707 net::UnescapeRule::NORMAL, omit_all_offsets); |
974 } | 708 } |
975 | 709 |
976 } // namespace | 710 } // namespace |
977 | 711 |
978 } // namespace url_formatter | 712 } // namespace url_formatter |
OLD | NEW |