Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(538)

Side by Side Diff: components/url_formatter/url_formatter_unittest.cc

Issue 1258813002: Implement a new IDN display policy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: typo fix Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <string.h> 7 #include <string.h>
8 8
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/macros.h" 11 #include "base/macros.h"
12 #include "base/strings/string_number_conversions.h" 12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/stringprintf.h" 13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h" 14 #include "base/strings/utf_string_conversions.h"
15 #include "testing/gtest/include/gtest/gtest.h" 15 #include "testing/gtest/include/gtest/gtest.h"
16 #include "url/gurl.h" 16 #include "url/gurl.h"
17 17
18 18
19 namespace url_formatter { 19 namespace url_formatter {
20 20
21 namespace { 21 namespace {
22 22
23 using base::WideToUTF16; 23 using base::WideToUTF16;
24 using base::ASCIIToUTF16; 24 using base::ASCIIToUTF16;
25 25
26 const size_t kNpos = base::string16::npos; 26 const size_t kNpos = base::string16::npos;
27 27
28 const char* const kLanguages[] = {
29 "", "en", "zh-CN", "ja", "ko",
30 "he", "ar", "ru", "el", "fr",
31 "de", "pt", "sv", "th", "hi",
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
33 "zh,ru,en"
34 };
35
36 struct IDNTestCase { 28 struct IDNTestCase {
37 const char* const input; 29 const char* const input;
38 const wchar_t* unicode_output; 30 const wchar_t* unicode_output;
39 const bool unicode_allowed[arraysize(kLanguages)]; 31 const bool unicode_allowed;
40 }; 32 };
41 33
42 // TODO(jungshik) This is just a random sample of languages and is far 34 // TODO(jungshik) This is just a random sample of languages and is far
43 // from exhaustive. We may have to generate all the combinations 35 // from exhaustive. We may have to generate all the combinations
44 // of languages (powerset of a set of all the languages). 36 // of languages (powerset of a set of all the languages).
45 const IDNTestCase idn_cases[] = { 37 const IDNTestCase idn_cases[] = {
46 // No IDN 38 // No IDN
47 {"www.google.com", L"www.google.com", 39 {"www.google.com", L"www.google.com", true},
48 {true, true, true, true, true, 40 {"www.google.com.", L"www.google.com.", true},
49 true, true, true, true, true, 41 {".", L".", true},
50 true, true, true, true, true, 42 {"", L"", true},
51 true, true, true, true, true, 43 // IDN
52 true}}, 44 // Hanzi (Traditional Chinese)
53 {"www.google.com.", L"www.google.com.", 45 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
54 {true, true, true, true, true, 46 // Hanzi ('video' in Simplified Chinese
55 true, true, true, true, true, 47 {"xn--cy2a840a.com", L"\x89c6\x9891.com", true},
56 true, true, true, true, true, 48 // Hanzi + '123'
57 true, true, true, true, true, 49 {"www.xn--123-p18d.com",
58 true}}, 50 L"www.\x4e00"
59 {".", L".", 51 L"123.com",
60 {true, true, true, true, true, 52 true},
61 true, true, true, true, true, 53 // Hanzi + Latin : U+56FD is simplified
62 true, true, true, true, true, 54 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true},
63 true, true, true, true, true, 55 // Kanji + Kana (Japanese)
64 true}}, 56 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true},
65 {"", L"", 57 // Katakana including U+30FC
66 {true, true, true, true, true, 58 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true},
67 true, true, true, true, true, 59 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true},
68 true, true, true, true, true, 60 // Katakana + Latin (Japanese)
69 true, true, true, true, true, 61 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true},
70 true}}, 62 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true},
71 // IDN 63 // Hangul (Korean)
72 // Hanzi (Traditional Chinese) 64 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true},
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", 65 // b<u-umlaut>cher (German)
74 {true, false, true, true, false, 66 {"xn--bcher-kva.de",
75 false, false, false, false, false, 67 L"b\x00fc"
76 false, false, false, false, false, 68 L"cher.de",
77 false, false, true, true, false, 69 true},
78 true}}, 70 // a with diaeresis
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) 71 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true},
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com", 72 // c-cedilla (French)
81 {true, false, true, false, false, 73 {"www.xn--alliancefranaise-npb.fr",
82 false, false, false, false, false, 74 L"www.alliancefran\x00e7"
83 false, false, false, false, false, 75 L"aise.fr",
84 false, false, false, false, false, 76 true},
85 true}}, 77 // caf'e with acute accent' (French)
86 // Hanzi + '123' 78 {"xn--caf-dma.fr", L"caf\x00e9.fr", true},
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", 79 // c-cedillla and a with tilde (Portuguese)
88 {true, false, true, true, false, 80 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true},
89 false, false, false, false, false, 81 // s with caron
90 false, false, false, false, false, 82 {"xn--achy-f6a.com",
91 false, false, true, true, false, 83 L"\x0161"
92 true}}, 84 L"achy.com",
93 // Hanzi + Latin : U+56FD is simplified and is regarded 85 true},
94 // as not supported in zh-TW. 86 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", 87 true},
96 {false, false, true, true, false, 88 // Eutopia + 123 (Greek)
97 false, false, false, false, false, 89 {"xn---123-pldm0haj2bk.gr",
98 false, false, false, false, false, 90 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true},
99 false, false, false, true, false, 91 // Cyrillic (Russian)
100 true}}, 92 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true},
101 // Kanji + Kana (Japanese) 93 // Cyrillic + 123 (Russian)
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", 94 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true},
103 {true, false, false, true, false, 95 // Arabic
104 false, false, false, false, false, 96 {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true},
105 false, false, false, false, false, 97 // Hebrew
106 false, false, false, true, false, 98 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true},
107 false}}, 99 // Thai
108 // Katakana including U+30FC 100 {"xn--12c2cc4ag3b4ccu.th",
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", 101 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
110 {true, false, false, true, false, 102 // Devangari (Hindi)
111 false, false, false, false, false, 103 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
112 false, false, false, false, false, 104 // Invalid IDN
113 false, false, false, true, false, 105 {"xn--hello?world.com", NULL, false},
114 }}, 106 // Unsafe IDNs
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", 107 // "payp<alpha>l.com"
116 {true, false, false, true, false, 108 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
117 false, false, false, false, false, 109 // google.gr with Greek omicron and epsilon
118 false, false, false, false, false, 110 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false},
119 false, false, false, true, false, 111 // google.ru with Cyrillic o
120 }}, 112 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false},
121 // Katakana + Latin (Japanese) 113 // h<e with acute>llo<China in Han>.cn
122 // TODO(jungshik): Change 'false' in the first element to 'true' 114 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false},
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead 115 // <Greek rho><Cyrillic a><Cyrillic u>.ru
124 // of our IsIDNComponentInSingleScript(). 116 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false},
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", 117 // One that's really long that will force a buffer realloc
126 {false, false, false, true, false, 118 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
127 false, false, false, false, false, 119 "aaaaaaa",
128 false, false, false, false, false, 120 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
129 false, false, false, true, false, 121 L"aaaaaaaa",
130 }}, 122 true},
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", 123 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
132 {false, false, false, true, false, 124 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
133 false, false, false, false, false, 125 // Test cases for characters we blacklisted although allowed in IDN.
134 false, false, false, false, false, 126 {"google.xn--comabc-k8d",
135 false, false, false, true, false, 127 L"google.com\x0338"
136 }}, 128 L"abc",
137 // Hangul (Korean) 129 false},
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", 130 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false},
139 {true, false, false, false, true, 131 {"google.xn--comevil-v04f.jp",
140 false, false, false, false, false, 132 L"google.com\x30ce"
141 false, false, false, false, false, 133 L"evil.jp",
142 false, false, false, true, false, 134 false},
143 false}}, 135 // Padlock icon spoof.
144 // b<u-umlaut>cher (German) 136 {"xn--google-hj64e", L"\U0001f512google.com", false},
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", 137 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
146 {true, false, false, false, false, 138 // all strings with the surrogate '\xdd12'.
147 false, false, false, false, true, 139 {"xn--fk9c.com", L"\U00010912.com", false},
148 true, false, false, false, false, 140 {"xn--g6h.com", L"\x2665.com", true},
149 true, false, false, false, false, 141 {"xn--2ci.com", L"\x272a.com", true},
150 false}},
151 // a with diaeresis
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
153 {true, false, false, false, false,
154 false, false, false, false, false,
155 true, false, true, false, false,
156 true, false, false, false, false,
157 false}},
158 // c-cedilla (French)
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
160 {true, false, false, false, false,
161 false, false, false, false, true,
162 false, true, false, false, false,
163 false, false, false, false, false,
164 false}},
165 // caf'e with acute accent' (French)
166 {"xn--caf-dma.fr", L"caf\x00e9.fr",
167 {true, false, false, false, false,
168 false, false, false, false, true,
169 false, true, true, false, false,
170 false, false, false, false, false,
171 false}},
172 // c-cedillla and a with tilde (Portuguese)
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
174 {true, false, false, false, false,
175 false, false, false, false, false,
176 false, true, false, false, false,
177 false, false, false, false, false,
178 false}},
179 // s with caron
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com",
181 {true, false, false, false, false,
182 false, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false}},
186 // TODO(jungshik) : Add examples with Cyrillic letters
187 // only used in some languages written in Cyrillic.
188 // Eutopia (Greek)
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
190 {true, false, false, false, false,
191 false, false, false, true, false,
192 false, false, false, false, false,
193 false, true, false, false, false,
194 false}},
195 // Eutopia + 123 (Greek)
196 {"xn---123-pldm0haj2bk.gr",
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
198 {true, false, false, false, false,
199 false, false, false, true, false,
200 false, false, false, false, false,
201 false, true, false, false, false,
202 false}},
203 // Cyrillic (Russian)
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
205 {true, false, false, false, false,
206 false, false, true, false, false,
207 false, false, false, false, false,
208 false, false, false, false, true,
209 true}},
210 // Cyrillic + 123 (Russian)
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
212 {true, false, false, false, false,
213 false, false, true, false, false,
214 false, false, false, false, false,
215 false, false, false, false, true,
216 true}},
217 // Arabic
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
219 {true, false, false, false, false,
220 false, true, false, false, false,
221 false, false, false, false, false,
222 false, false, false, false, false,
223 false}},
224 // Hebrew
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
226 {true, false, false, false, false,
227 true, false, false, false, false,
228 false, false, false, false, false,
229 false, false, false, false, true,
230 false}},
231 // Thai
232 {"xn--12c2cc4ag3b4ccu.th",
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
234 {true, false, false, false, false,
235 false, false, false, false, false,
236 false, false, false, true, false,
237 false, false, false, false, false,
238 false}},
239 // Devangari (Hindi)
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
241 {true, false, false, false, false,
242 false, false, false, false, false,
243 false, false, false, false, true,
244 false, false, false, false, false,
245 false}},
246 // Invalid IDN
247 {"xn--hello?world.com", NULL,
248 {false, false, false, false, false,
249 false, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, false,
252 false}},
253 // Unsafe IDNs
254 // "payp<alpha>l.com"
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
256 {false, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false}},
261 // google.gr with Greek omicron and epsilon
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
263 {false, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false}},
268 // google.ru with Cyrillic o
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false}},
275 // h<e with acute>llo<China in Han>.cn
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
277 {false, false, false, false, false,
278 false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false}},
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
284 {false, false, false, false, false,
285 false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
288 false}},
289 // One that's really long that will force a buffer realloc
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
291 "aaaaaaa",
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
293 L"aaaaaaaa",
294 {true, true, true, true, true,
295 true, true, true, true, true,
296 true, true, true, true, true,
297 true, true, true, true, true,
298 true}},
299 // Test cases for characters we blacklisted although allowed in IDN.
300 // Embedded spaces will be turned to %20 in the display.
301 // TODO(jungshik): We need to have more cases. This is a typical
302 // data-driven trap. The following test cases need to be separated
303 // and tested only for a couple of languages.
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
305 {false, false, false, false, false,
306 false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
309 false}},
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
311 {false, false, false, false, false,
312 false, false, false, false, false,
313 false, false, false, false, false,
314 false, false, false, false, false,
315 }},
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
317 {false, false, false, false, false,
318 false, false, false, false, false,
319 false, false, false, false, false,
320 false, false, false, false, false,
321 }},
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
323 {false, false, false, false, false,
324 false, false, false, false, false,
325 false, false, false, false, false,
326 false, false, false, false, false,
327 }},
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
329 {false, false, false, false, false,
330 false, false, false, false, false,
331 false, false, false, false, false,
332 false, false, false, false, false,
333 }},
334 // Padlock icon spoof.
335 {"xn--google-hj64e", L"\U0001f512google.com",
336 {false, false, false, false, false,
337 false, false, false, false, false,
338 false, false, false, false, false,
339 false, false, false, false, false,
340 }},
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
342 // all strings with the surrogate '\xdd12'.
343 {"xn--fk9c.com", L"\U00010912.com",
344 {true, false, false, false, false,
345 false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, false, false, false,
348 }},
349 #if 0 142 #if 0
350 // These two cases are special. We need a separate test. 143 // These two cases are special. We need a separate test.
351 // U+3000 and U+3002 are normalized to ASCII space and dot. 144 // U+3000 and U+3002 are normalized to ASCII space and dot.
352 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", 145 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
353 {false, false, true, false, false, 146 {false, false, true, false, false,
354 false, false, false, false, false, 147 false, false, false, false, false,
355 false, false, false, false, false, 148 false, false, false, false, false,
356 false, false, true, false, false, 149 false, false, true, false, false,
357 true}}, 150 true}},
358 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", 151 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
359 {false, false, true, false, false, 152 {false, false, true, false, false,
360 false, false, false, false, false, 153 false, false, false, false, false,
361 false, false, false, false, false, 154 false, false, false, false, false,
362 false, false, true, false, false, 155 false, false, true, false, false,
363 true}}, 156 true}},
364 #endif 157 #endif
365 }; 158 };
366 159
367 struct AdjustOffsetCase { 160 struct AdjustOffsetCase {
368 size_t input_offset; 161 size_t input_offset;
369 size_t output_offset; 162 size_t output_offset;
370 }; 163 };
371 164
372 struct UrlTestData { 165 struct UrlTestData {
373 const char* const description; 166 const char* const description;
374 const char* const input; 167 const char* const input;
375 const char* const languages;
376 FormatUrlTypes format_types; 168 FormatUrlTypes format_types;
377 net::UnescapeRule::Type escape_rules; 169 net::UnescapeRule::Type escape_rules;
378 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. 170 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
379 size_t prefix_len; 171 size_t prefix_len;
380 }; 172 };
381 173
382 // A helper for IDN*{Fast,Slow}.
383 // Append "::<language list>" to |expected| and |actual| to make it
384 // easy to tell which sub-case fails without debugging.
385 void AppendLanguagesToOutputs(const char* languages,
386 base::string16* expected,
387 base::string16* actual) {
388 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
389 expected->append(to_append);
390 actual->append(to_append);
391 }
392
393 // A pair of helpers for the FormatUrlWithOffsets() test. 174 // A pair of helpers for the FormatUrlWithOffsets() test.
394 void VerboseExpect(size_t expected, 175 void VerboseExpect(size_t expected,
395 size_t actual, 176 size_t actual,
396 const std::string& original_url, 177 const std::string& original_url,
397 size_t position, 178 size_t position,
398 const base::string16& formatted_url) { 179 const base::string16& formatted_url) {
399 EXPECT_EQ(expected, actual) << "Original URL: " << original_url 180 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
400 << " (at char " << position << ")\nFormatted URL: " << formatted_url; 181 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
401 } 182 }
402 183
403 void CheckAdjustedOffsets(const std::string& url_string, 184 void CheckAdjustedOffsets(const std::string& url_string,
404 const std::string& languages,
405 FormatUrlTypes format_types, 185 FormatUrlTypes format_types,
406 net::UnescapeRule::Type unescape_rules, 186 net::UnescapeRule::Type unescape_rules,
407 const size_t* output_offsets) { 187 const size_t* output_offsets) {
408 GURL url(url_string); 188 GURL url(url_string);
409 size_t url_length = url_string.length(); 189 size_t url_length = url_string.length();
410 std::vector<size_t> offsets; 190 std::vector<size_t> offsets;
411 for (size_t i = 0; i <= url_length + 1; ++i) 191 for (size_t i = 0; i <= url_length + 1; ++i)
412 offsets.push_back(i); 192 offsets.push_back(i);
413 offsets.push_back(500000); // Something larger than any input length. 193 offsets.push_back(500000); // Something larger than any input length.
414 offsets.push_back(std::string::npos); 194 offsets.push_back(std::string::npos);
415 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, 195 base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(),
416 format_types, unescape_rules, NULL, NULL, &offsets); 196 format_types, unescape_rules, NULL, NULL, &offsets);
417 for (size_t i = 0; i < url_length; ++i) 197 for (size_t i = 0; i < url_length; ++i)
418 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); 198 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
419 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, 199 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
420 url_length, formatted_url); 200 url_length, formatted_url);
421 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, 201 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
422 500000, formatted_url); 202 500000, formatted_url);
423 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, 203 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
424 std::string::npos, formatted_url); 204 std::string::npos, formatted_url);
425 } 205 }
426 206
427 TEST(UrlFormatterTest, IDNToUnicodeFast) { 207 TEST(UrlFormatterTest, IDNToUnicode) {
428 for (size_t i = 0; i < arraysize(idn_cases); i++) { 208 for (size_t i = 0; i < arraysize(idn_cases); i++) {
429 for (size_t j = 0; j < arraysize(kLanguages); j++) { 209 base::string16 output(IDNToUnicode(idn_cases[i].input, std::string()));
430 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow 210 base::string16 expected(idn_cases[i].unicode_allowed
431 if (j == 3 || j == 17 || j == 18) 211 ? WideToUTF16(idn_cases[i].unicode_output)
432 continue; 212 : ASCIIToUTF16(idn_cases[i].input));
433 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); 213 EXPECT_EQ(expected, output) << "input # " << i << ": \""
434 base::string16 expected(idn_cases[i].unicode_allowed[j] ? 214 << idn_cases[i].input << "\"";
435 WideToUTF16(idn_cases[i].unicode_output) :
436 ASCIIToUTF16(idn_cases[i].input));
437 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
438 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
439 << "\", languages: \"" << kLanguages[j]
440 << "\"";
441 }
442 }
443 }
444
445 TEST(UrlFormatterTest, IDNToUnicodeSlow) {
446 for (size_t i = 0; i < arraysize(idn_cases); i++) {
447 for (size_t j = 0; j < arraysize(kLanguages); j++) {
448 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
449 if (!(j == 3 || j == 17 || j == 18))
450 continue;
451 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
452 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
453 WideToUTF16(idn_cases[i].unicode_output) :
454 ASCIIToUTF16(idn_cases[i].input));
455 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
456 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
457 << "\", languages: \"" << kLanguages[j]
458 << "\"";
459 }
460 }
461 }
462
463 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
464 // te), which was causing a crash (See http://crbug.com/510551). This may be an
465 // icu bug, but regardless, that should not cause a crash.
466 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) {
467 for (char c1 = 'a'; c1 <= 'z'; c1++) {
468 for (char c2 = 'a'; c2 <= 'z'; c2++) {
469 std::string lang = base::StringPrintf("%c%c", c1, c2);
470 base::string16 output(IDNToUnicode("xn--74h", lang));
471 }
472 } 215 }
473 } 216 }
474 217
475 TEST(UrlFormatterTest, FormatUrl) { 218 TEST(UrlFormatterTest, FormatUrl) {
476 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; 219 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
477 const UrlTestData tests[] = { 220 const UrlTestData tests[] = {
478 {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", 221 {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"",
479 0}, 222 0},
480 223
481 {"Simple URL", "http://www.google.com/", "", default_format_type, 224 {"Simple URL", "http://www.google.com/", default_format_type,
482 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, 225 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7},
483 226
484 {"With a port number and a reference", 227 {"With a port number and a reference",
485 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, 228 "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type,
486 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, 229 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
487 230
488 // -------- IDN tests -------- 231 // -------- IDN tests --------
489 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", 232 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp",
490 default_format_type, net::UnescapeRule::NORMAL, 233 default_format_type, net::UnescapeRule::NORMAL,
491 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, 234 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
492 235
493 {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", 236 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp",
494 default_format_type, net::UnescapeRule::NORMAL,
495 L"http://xn--l8jvb1ey91xtjb.jp/", 7},
496
497 {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "",
498 default_format_type, net::UnescapeRule::NORMAL,
499 // Single script is safe for empty languages.
500 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
501
502 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja",
503 default_format_type, net::UnescapeRule::NORMAL, 237 default_format_type, net::UnescapeRule::NORMAL,
504 // GURL doesn't assume an email address's domain part as a host name. 238 // GURL doesn't assume an email address's domain part as a host name.
505 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, 239 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
506 240
507 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", 241 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
508 "ja", default_format_type, net::UnescapeRule::NORMAL, 242 default_format_type, net::UnescapeRule::NORMAL,
509 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, 243 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
510 244
511 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", 245 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys",
512 default_format_type, net::UnescapeRule::NORMAL, 246 default_format_type, net::UnescapeRule::NORMAL,
513 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, 247 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
514 248
515 // -------- omit_username_password flag tests -------- 249 // -------- omit_username_password flag tests --------
516 {"With username and password, omit_username_password=false", 250 {"With username and password, omit_username_password=false",
517 "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, 251 "http://user:passwd@example.com/foo", kFormatUrlOmitNothing,
518 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, 252 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
519 253
520 {"With username and password, omit_username_password=true", 254 {"With username and password, omit_username_password=true",
521 "http://user:passwd@example.com/foo", "", default_format_type, 255 "http://user:passwd@example.com/foo", default_format_type,
522 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, 256 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7},
523 257
524 {"With username and no password", "http://user@example.com/foo", "", 258 {"With username and no password", "http://user@example.com/foo",
525 default_format_type, net::UnescapeRule::NORMAL, 259 default_format_type, net::UnescapeRule::NORMAL,
526 L"http://example.com/foo", 7}, 260 L"http://example.com/foo", 7},
527 261
528 {"Just '@' without username and password", "http://@example.com/foo", "", 262 {"Just '@' without username and password", "http://@example.com/foo",
529 default_format_type, net::UnescapeRule::NORMAL, 263 default_format_type, net::UnescapeRule::NORMAL,
530 L"http://example.com/foo", 7}, 264 L"http://example.com/foo", 7},
531 265
532 // GURL doesn't think local-part of an email address is username for URL. 266 // GURL doesn't think local-part of an email address is username for URL.
533 {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", 267 {"mailto:, omit_username_password=true", "mailto:foo@example.com",
534 default_format_type, net::UnescapeRule::NORMAL, 268 default_format_type, net::UnescapeRule::NORMAL,
535 L"mailto:foo@example.com", 7}, 269 L"mailto:foo@example.com", 7},
536 270
537 // -------- unescape flag tests -------- 271 // -------- unescape flag tests --------
538 {"Do not unescape", 272 {"Do not unescape",
539 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 273 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
540 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 274 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
541 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 275 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
542 "en", default_format_type, net::UnescapeRule::NONE, 276 default_format_type, net::UnescapeRule::NONE,
543 // GURL parses %-encoded hostnames into Punycode. 277 // GURL parses %-encoded hostnames into Punycode.
544 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 278 L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB "
545 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 279 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
546 7}, 280 7},
547 281
548 {"Unescape normally", 282 {"Unescape normally",
549 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 283 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
550 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 284 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
551 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 285 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
552 "en", default_format_type, net::UnescapeRule::NORMAL, 286 default_format_type, net::UnescapeRule::NORMAL,
553 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" 287 L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB"
554 L"?q=\x30B0\x30FC\x30B0\x30EB", 288 L"?q=\x30B0\x30FC\x30B0\x30EB",
555 7}, 289 7},
556 290
557 {"Unescape normally with BiDi control character", 291 {"Unescape normally with BiDi control character",
558 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", 292 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy",
559 default_format_type, net::UnescapeRule::NORMAL, 293 default_format_type, net::UnescapeRule::NORMAL,
560 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, 294 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
561 295
562 {"Unescape normally including unescape spaces", 296 {"Unescape normally including unescape spaces",
563 "http://www.google.com/search?q=Hello%20World", "en", 297 "http://www.google.com/search?q=Hello%20World",
564 default_format_type, net::UnescapeRule::SPACES, 298 default_format_type, net::UnescapeRule::SPACES,
565 L"http://www.google.com/search?q=Hello World", 7}, 299 L"http://www.google.com/search?q=Hello World", 7},
566 300
567 /* 301 /*
568 {"unescape=true with some special characters", 302 {"unescape=true with some special characters",
569 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", 303 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z",
570 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 304 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
571 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, 305 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
572 */ 306 */
573 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". 307 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
574 308
575 // -------- omit http: -------- 309 // -------- omit http: --------
576 {"omit http with user name", "http://user@example.com/foo", "", 310 {"omit http with user name", "http://user@example.com/foo",
577 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, 311 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0},
578 312
579 {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, 313 {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP,
580 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, 314 net::UnescapeRule::NORMAL, L"www.google.com/", 0},
581 315
582 {"omit http with https", "https://www.google.com/", "en", 316 {"omit http with https", "https://www.google.com/",
583 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, 317 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL,
584 L"https://www.google.com/", 8}, 318 L"https://www.google.com/", 8},
585 319
586 {"omit http starts with ftp.", "http://ftp.google.com/", "en", 320 {"omit http starts with ftp.", "http://ftp.google.com/",
587 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", 321 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/",
588 7}, 322 7},
589 323
590 // -------- omit trailing slash on bare hostname -------- 324 // -------- omit trailing slash on bare hostname --------
591 {"omit slash when it's the entire path", "http://www.google.com/", "en", 325 {"omit slash when it's the entire path", "http://www.google.com/",
592 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 326 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
593 L"http://www.google.com", 7}, 327 L"http://www.google.com", 7},
594 {"omit slash when there's a ref", "http://www.google.com/#ref", "en", 328 {"omit slash when there's a ref", "http://www.google.com/#ref",
595 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 329 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
596 L"http://www.google.com/#ref", 7}, 330 L"http://www.google.com/#ref", 7},
597 {"omit slash when there's a query", "http://www.google.com/?", "en", 331 {"omit slash when there's a query", "http://www.google.com/?",
598 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 332 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
599 L"http://www.google.com/?", 7}, 333 L"http://www.google.com/?", 7},
600 {"omit slash when it's not the entire path", "http://www.google.com/foo", 334 {"omit slash when it's not the entire path", "http://www.google.com/foo",
601 "en", kFormatUrlOmitTrailingSlashOnBareHostname, 335 kFormatUrlOmitTrailingSlashOnBareHostname,
602 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, 336 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7},
603 {"omit slash for nonstandard URLs", "data:/", "en", 337 {"omit slash for nonstandard URLs", "data:/",
604 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 338 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
605 L"data:/", 5}, 339 L"data:/", 5},
606 {"omit slash for file URLs", "file:///", "en", 340 {"omit slash for file URLs", "file:///",
607 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 341 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
608 L"file:///", 7}, 342 L"file:///", 7},
609 343
610 // -------- view-source: -------- 344 // -------- view-source: --------
611 {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", 345 {"view-source", "view-source:http://xn--qcka1pmc.jp/",
612 default_format_type, net::UnescapeRule::NORMAL, 346 default_format_type, net::UnescapeRule::NORMAL,
613 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, 347 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
614 348
615 {"view-source of view-source", 349 {"view-source of view-source",
616 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", 350 "view-source:view-source:http://xn--qcka1pmc.jp/",
617 default_format_type, net::UnescapeRule::NORMAL, 351 default_format_type, net::UnescapeRule::NORMAL,
618 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, 352 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
619 353
620 // view-source should omit http and trailing slash where non-view-source 354 // view-source should omit http and trailing slash where non-view-source
621 // would. 355 // would.
622 {"view-source omit http", "view-source:http://a.b/c", "en", 356 {"view-source omit http", "view-source:http://a.b/c",
623 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, 357 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
624 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", 358 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
625 "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, 359 kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
626 L"view-source:http://ftp.b/c", 19}, 360 L"view-source:http://ftp.b/c", 19},
627 {"view-source omit slash when it's the entire path", 361 {"view-source omit slash when it's the entire path",
628 "view-source:http://a.b/", "en", kFormatUrlOmitAll, 362 "view-source:http://a.b/", kFormatUrlOmitAll,
629 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, 363 net::UnescapeRule::NORMAL, L"view-source:a.b", 12},
630 }; 364 };
631 365
632 for (size_t i = 0; i < arraysize(tests); ++i) { 366 for (size_t i = 0; i < arraysize(tests); ++i) {
633 size_t prefix_len; 367 size_t prefix_len;
634 base::string16 formatted = FormatUrl( 368 base::string16 formatted = FormatUrl(
635 GURL(tests[i].input), tests[i].languages, tests[i].format_types, 369 GURL(tests[i].input), std::string(), tests[i].format_types,
636 tests[i].escape_rules, NULL, &prefix_len, NULL); 370 tests[i].escape_rules, NULL, &prefix_len, NULL);
637 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; 371 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
638 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; 372 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
639 } 373 }
640 } 374 }
641 375
642 TEST(UrlFormatterTest, FormatUrlParsed) { 376 TEST(UrlFormatterTest, FormatUrlParsed) {
643 // No unescape case. 377 // No unescape case.
644 url::Parsed parsed; 378 url::Parsed parsed;
645 base::string16 formatted = 379 base::string16 formatted =
646 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 380 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
647 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 381 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
648 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, 382 std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE,
649 NULL, NULL); 383 &parsed, NULL, NULL);
650 EXPECT_EQ(WideToUTF16( 384 EXPECT_EQ(WideToUTF16(
651 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 385 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
652 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); 386 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
653 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), 387 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
654 formatted.substr(parsed.username.begin, parsed.username.len)); 388 formatted.substr(parsed.username.begin, parsed.username.len));
655 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), 389 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
656 formatted.substr(parsed.password.begin, parsed.password.len)); 390 formatted.substr(parsed.password.begin, parsed.password.len));
657 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 391 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
658 formatted.substr(parsed.host.begin, parsed.host.len)); 392 formatted.substr(parsed.host.begin, parsed.host.len));
659 EXPECT_EQ(WideToUTF16(L"8080"), 393 EXPECT_EQ(WideToUTF16(L"8080"),
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
854 if (test_char && 588 if (test_char &&
855 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { 589 strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
856 EXPECT_NE(url.spec(), GURL(formatted).spec()); 590 EXPECT_NE(url.spec(), GURL(formatted).spec());
857 } else { 591 } else {
858 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 592 EXPECT_EQ(url.spec(), GURL(formatted).spec());
859 } 593 }
860 } 594 }
861 } 595 }
862 596
863 TEST(UrlFormatterTest, FormatUrlWithOffsets) { 597 TEST(UrlFormatterTest, FormatUrlWithOffsets) {
864 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, 598 CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing,
865 net::UnescapeRule::NORMAL, NULL); 599 net::UnescapeRule::NORMAL, NULL);
866 600
867 const size_t basic_offsets[] = { 601 const size_t basic_offsets[] = {
868 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 602 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
869 21, 22, 23, 24, 25 603 21, 22, 23, 24, 25
870 }; 604 };
871 CheckAdjustedOffsets("http://www.google.com/foo/", "en", 605 CheckAdjustedOffsets("http://www.google.com/foo/",
872 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 606 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
873 basic_offsets); 607 basic_offsets);
874 608
875 const size_t omit_auth_offsets_1[] = { 609 const size_t omit_auth_offsets_1[] = {
876 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 610 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
877 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 611 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
878 }; 612 };
879 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", 613 CheckAdjustedOffsets("http://foo:bar@www.google.com/",
880 kFormatUrlOmitUsernamePassword, 614 kFormatUrlOmitUsernamePassword,
881 net::UnescapeRule::NORMAL, omit_auth_offsets_1); 615 net::UnescapeRule::NORMAL, omit_auth_offsets_1);
882 616
883 const size_t omit_auth_offsets_2[] = { 617 const size_t omit_auth_offsets_2[] = {
884 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, 618 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
885 15, 16, 17, 18, 19, 20, 21 619 15, 16, 17, 18, 19, 20, 21
886 }; 620 };
887 CheckAdjustedOffsets("http://foo@www.google.com/", "en", 621 CheckAdjustedOffsets("http://foo@www.google.com/",
888 kFormatUrlOmitUsernamePassword, 622 kFormatUrlOmitUsernamePassword,
889 net::UnescapeRule::NORMAL, omit_auth_offsets_2); 623 net::UnescapeRule::NORMAL, omit_auth_offsets_2);
890 624
891 const size_t dont_omit_auth_offsets[] = { 625 const size_t dont_omit_auth_offsets[] = {
892 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 626 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
893 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 627 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
894 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 628 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
895 30, 31 629 30, 31
896 }; 630 };
897 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". 631 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
898 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", 632 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/",
899 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 633 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
900 dont_omit_auth_offsets); 634 dont_omit_auth_offsets);
901 635
902 const size_t view_source_offsets[] = { 636 const size_t view_source_offsets[] = {
903 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, 637 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
904 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 638 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
905 }; 639 };
906 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", 640 CheckAdjustedOffsets("view-source:http://foo@www.google.com/",
907 kFormatUrlOmitUsernamePassword, 641 kFormatUrlOmitUsernamePassword,
908 net::UnescapeRule::NORMAL, view_source_offsets); 642 net::UnescapeRule::NORMAL, view_source_offsets);
909 643
910 const size_t idn_hostname_offsets_1[] = { 644 const size_t idn_hostname_offsets_1[] = {
911 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 645 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
912 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 646 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
913 13, 14, 15, 16, 17, 18, 19 647 13, 14, 15, 16, 17, 18, 19
914 }; 648 };
915 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". 649 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
916 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", 650 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/",
917 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 651 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
918 idn_hostname_offsets_1); 652 idn_hostname_offsets_1);
919 653
920 const size_t idn_hostname_offsets_2[] = { 654 const size_t idn_hostname_offsets_2[] = {
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, 655 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
922 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, 656 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
923 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 657 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
924 kNpos, 19, 20, 21, 22, 23, 24 658 kNpos, 19, 20, 21, 22, 23, 24
925 }; 659 };
926 // Convert punycode to 660 // Convert punycode to
927 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". 661 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
928 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", 662 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
929 "zh-CN", kFormatUrlOmitNothing, 663 kFormatUrlOmitNothing,
930 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); 664 net::UnescapeRule::NORMAL, idn_hostname_offsets_2);
931 665
932 const size_t unescape_offsets[] = { 666 const size_t unescape_offsets[] = {
933 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 667 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
934 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, 668 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
935 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, 669 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
936 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 670 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
937 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos 671 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
938 }; 672 };
939 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". 673 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
940 CheckAdjustedOffsets( 674 CheckAdjustedOffsets(
941 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 675 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
942 "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); 676 kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
943 677
944 const size_t ref_offsets[] = { 678 const size_t ref_offsets[] = {
945 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 679 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
946 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 680 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
947 33 681 33
948 }; 682 };
949 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". 683 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
950 CheckAdjustedOffsets( 684 CheckAdjustedOffsets(
951 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", 685 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z",
952 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); 686 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets);
953 687
954 const size_t omit_http_offsets[] = { 688 const size_t omit_http_offsets[] = {
955 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 689 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
956 10, 11, 12, 13, 14 690 10, 11, 12, 13, 14
957 }; 691 };
958 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, 692 CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP,
959 net::UnescapeRule::NORMAL, omit_http_offsets); 693 net::UnescapeRule::NORMAL, omit_http_offsets);
960 694
961 const size_t omit_http_start_with_ftp_offsets[] = { 695 const size_t omit_http_start_with_ftp_offsets[] = {
962 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 696 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
963 }; 697 };
964 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, 698 CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP,
965 net::UnescapeRule::NORMAL, 699 net::UnescapeRule::NORMAL,
966 omit_http_start_with_ftp_offsets); 700 omit_http_start_with_ftp_offsets);
967 701
968 const size_t omit_all_offsets[] = { 702 const size_t omit_all_offsets[] = {
969 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 703 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
970 0, 1, 2, 3, 4, 5, 6, 7 704 0, 1, 2, 3, 4, 5, 6, 7
971 }; 705 };
972 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, 706 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
973 net::UnescapeRule::NORMAL, omit_all_offsets); 707 net::UnescapeRule::NORMAL, omit_all_offsets);
974 } 708 }
975 709
976 } // namespace 710 } // namespace
977 711
978 } // namespace url_formatter 712 } // namespace url_formatter
OLDNEW
« components/url_formatter/url_formatter.cc ('K') | « components/url_formatter/url_formatter.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698