Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(235)

Side by Side Diff: net/base/net_util_icu_unittest.cc

Issue 1258813002: Implement a new IDN display policy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: add back languages to one more, update comments Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« net/base/net_util_icu.cc ('K') | « net/base/net_util_icu.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <string.h> 7 #include <string.h>
8 8
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/format_macros.h" 11 #include "base/format_macros.h"
12 #include "base/strings/string_number_conversions.h" 12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/stringprintf.h" 13 #include "base/strings/stringprintf.h"
14 #include "base/strings/utf_string_conversions.h" 14 #include "base/strings/utf_string_conversions.h"
15 #include "base/time/time.h" 15 #include "base/time/time.h"
16 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "url/gurl.h" 17 #include "url/gurl.h"
18 18
19 using base::ASCIIToUTF16; 19 using base::ASCIIToUTF16;
20 using base::WideToUTF16; 20 using base::WideToUTF16;
21 21
22 namespace net { 22 namespace net {
23 23
24 namespace { 24 namespace {
25 25
26 const size_t kNpos = base::string16::npos; 26 const size_t kNpos = base::string16::npos;
27
28 const char* const kLanguages[] = {
29 "", "en", "zh-CN", "ja", "ko",
30 "he", "ar", "ru", "el", "fr",
31 "de", "pt", "sv", "th", "hi",
32 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
33 "zh,ru,en"
34 };
35
36 struct IDNTestCase { 27 struct IDNTestCase {
37 const char* const input; 28 const char* const input;
38 const wchar_t* unicode_output; 29 const wchar_t* unicode_output;
39 const bool unicode_allowed[arraysize(kLanguages)]; 30 const bool unicode_allowed;
40 }; 31 };
41 32
42 // TODO(jungshik) This is just a random sample of languages and is far 33 // TODO(jungshik) This is just a random sample of languages and is far
43 // from exhaustive. We may have to generate all the combinations 34 // from exhaustive. We may have to generate all the combinations
44 // of languages (powerset of a set of all the languages). 35 // of languages (powerset of a set of all the languages).
45 const IDNTestCase idn_cases[] = { 36 const IDNTestCase idn_cases[] = {
46 // No IDN 37 // No IDN
47 {"www.google.com", L"www.google.com", 38 {"www.google.com", L"www.google.com", true},
48 {true, true, true, true, true, 39 {"www.google.com.", L"www.google.com.", true},
49 true, true, true, true, true, 40 {".", L".", true},
50 true, true, true, true, true, 41 {"", L"", true},
51 true, true, true, true, true, 42 // IDN
52 true}}, 43 // Hanzi (Traditional Chinese)
53 {"www.google.com.", L"www.google.com.", 44 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
54 {true, true, true, true, true, 45 // Hanzi ('video' in Simplified Chinese
55 true, true, true, true, true, 46 {"xn--cy2a840a.com", L"\x89c6\x9891.com", true},
56 true, true, true, true, true, 47 // Hanzi + '123'
57 true, true, true, true, true, 48 {"www.xn--123-p18d.com",
58 true}}, 49 L"www.\x4e00"
59 {".", L".", 50 L"123.com",
60 {true, true, true, true, true, 51 true},
61 true, true, true, true, true, 52 // Hanzi + Latin : U+56FD is simplified
62 true, true, true, true, true, 53 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true},
63 true, true, true, true, true, 54 // Kanji + Kana (Japanese)
64 true}}, 55 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true},
65 {"", L"", 56 // Katakana including U+30FC
66 {true, true, true, true, true, 57 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true},
67 true, true, true, true, true, 58 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true},
68 true, true, true, true, true, 59 // Katakana + Latin (Japanese)
69 true, true, true, true, true, 60 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true},
70 true}}, 61 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true},
71 // IDN 62 // Hangul (Korean)
72 // Hanzi (Traditional Chinese) 63 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true},
73 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", 64 // b<u-umlaut>cher (German)
74 {true, false, true, true, false, 65 {"xn--bcher-kva.de",
75 false, false, false, false, false, 66 L"b\x00fc"
76 false, false, false, false, false, 67 L"cher.de",
77 false, false, true, true, false, 68 true},
78 true}}, 69 // a with diaeresis
79 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) 70 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true},
80 {"xn--cy2a840a.com", L"\x89c6\x9891.com", 71 // c-cedilla (French)
81 {true, false, true, false, false, 72 {"www.xn--alliancefranaise-npb.fr",
82 false, false, false, false, false, 73 L"www.alliancefran\x00e7"
83 false, false, false, false, false, 74 L"aise.fr",
84 false, false, false, false, false, 75 true},
85 true}}, 76 // caf'e with acute accent' (French)
86 // Hanzi + '123' 77 {"xn--caf-dma.fr", L"caf\x00e9.fr", true},
87 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", 78 // c-cedillla and a with tilde (Portuguese)
88 {true, false, true, true, false, 79 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true},
89 false, false, false, false, false, 80 // s with caron
90 false, false, false, false, false, 81 {"xn--achy-f6a.com",
91 false, false, true, true, false, 82 L"\x0161"
92 true}}, 83 L"achy.com",
93 // Hanzi + Latin : U+56FD is simplified and is regarded 84 true},
94 // as not supported in zh-TW. 85 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
95 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", 86 true},
96 {false, false, true, true, false, 87 // Eutopia + 123 (Greek)
97 false, false, false, false, false, 88 {"xn---123-pldm0haj2bk.gr",
98 false, false, false, false, false, 89 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true},
99 false, false, false, true, false, 90 // Cyrillic (Russian)
100 true}}, 91 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true},
101 // Kanji + Kana (Japanese) 92 // Cyrillic + 123 (Russian)
102 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", 93 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true},
103 {true, false, false, true, false, 94 // Arabic
104 false, false, false, false, false, 95 {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true},
105 false, false, false, false, false, 96 // Hebrew
106 false, false, false, true, false, 97 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true},
107 false}}, 98 // Thai
108 // Katakana including U+30FC 99 {"xn--12c2cc4ag3b4ccu.th",
109 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", 100 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
110 {true, false, false, true, false, 101 // Devangari (Hindi)
111 false, false, false, false, false, 102 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
112 false, false, false, false, false, 103 // Invalid IDN
113 false, false, false, true, false, 104 {"xn--hello?world.com", NULL, false},
114 }}, 105 // Unsafe IDNs
115 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", 106 // "payp<alpha>l.com"
116 {true, false, false, true, false, 107 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
117 false, false, false, false, false, 108 // google.gr with Greek omicron and epsilon
118 false, false, false, false, false, 109 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false},
119 false, false, false, true, false, 110 // google.ru with Cyrillic o
120 }}, 111 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false},
121 // Katakana + Latin (Japanese) 112 // h<e with acute>llo<China in Han>.cn
122 // TODO(jungshik): Change 'false' in the first element to 'true' 113 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false},
123 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead 114 // <Greek rho><Cyrillic a><Cyrillic u>.ru
124 // of our IsIDNComponentInSingleScript(). 115 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false},
125 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", 116 // One that's really long that will force a buffer realloc
126 {false, false, false, true, false, 117 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
127 false, false, false, false, false, 118 "aaaaaaa",
128 false, false, false, false, false, 119 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
129 false, false, false, true, false, 120 L"aaaaaaaa",
130 }}, 121 true},
131 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", 122 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
132 {false, false, false, true, false, 123 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
133 false, false, false, false, false, 124 // Test cases for characters we blacklisted although allowed in IDN.
134 false, false, false, false, false, 125 {"google.xn--comabc-k8d",
135 false, false, false, true, false, 126 L"google.com\x0338"
136 }}, 127 L"abc",
137 // Hangul (Korean) 128 false},
138 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", 129 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false},
139 {true, false, false, false, true, 130 {"google.xn--comevil-v04f.jp",
140 false, false, false, false, false, 131 L"google.com\x30ce"
141 false, false, false, false, false, 132 L"evil.jp",
142 false, false, false, true, false, 133 false},
143 false}}, 134 // Padlock icon spoof.
144 // b<u-umlaut>cher (German) 135 {"xn--google-hj64e", L"\U0001f512google.com", false},
145 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", 136 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
146 {true, false, false, false, false, 137 // all strings with the surrogate '\xdd12'.
147 false, false, false, false, true, 138 {"xn--fk9c.com", L"\U00010912.com", false},
148 true, false, false, false, false, 139 {"xn--g6h.com", L"\x2665.com", true},
149 true, false, false, false, false, 140 {"xn--2ci.com", L"\x272a.com", true},
150 false}},
151 // a with diaeresis
152 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
153 {true, false, false, false, false,
154 false, false, false, false, false,
155 true, false, true, false, false,
156 true, false, false, false, false,
157 false}},
158 // c-cedilla (French)
159 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
160 {true, false, false, false, false,
161 false, false, false, false, true,
162 false, true, false, false, false,
163 false, false, false, false, false,
164 false}},
165 // caf'e with acute accent' (French)
166 {"xn--caf-dma.fr", L"caf\x00e9.fr",
167 {true, false, false, false, false,
168 false, false, false, false, true,
169 false, true, true, false, false,
170 false, false, false, false, false,
171 false}},
172 // c-cedillla and a with tilde (Portuguese)
173 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
174 {true, false, false, false, false,
175 false, false, false, false, false,
176 false, true, false, false, false,
177 false, false, false, false, false,
178 false}},
179 // s with caron
180 {"xn--achy-f6a.com", L"\x0161" L"achy.com",
181 {true, false, false, false, false,
182 false, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false}},
186 // TODO(jungshik) : Add examples with Cyrillic letters
187 // only used in some languages written in Cyrillic.
188 // Eutopia (Greek)
189 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
190 {true, false, false, false, false,
191 false, false, false, true, false,
192 false, false, false, false, false,
193 false, true, false, false, false,
194 false}},
195 // Eutopia + 123 (Greek)
196 {"xn---123-pldm0haj2bk.gr",
197 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
198 {true, false, false, false, false,
199 false, false, false, true, false,
200 false, false, false, false, false,
201 false, true, false, false, false,
202 false}},
203 // Cyrillic (Russian)
204 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
205 {true, false, false, false, false,
206 false, false, true, false, false,
207 false, false, false, false, false,
208 false, false, false, false, true,
209 true}},
210 // Cyrillic + 123 (Russian)
211 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
212 {true, false, false, false, false,
213 false, false, true, false, false,
214 false, false, false, false, false,
215 false, false, false, false, true,
216 true}},
217 // Arabic
218 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
219 {true, false, false, false, false,
220 false, true, false, false, false,
221 false, false, false, false, false,
222 false, false, false, false, false,
223 false}},
224 // Hebrew
225 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
226 {true, false, false, false, false,
227 true, false, false, false, false,
228 false, false, false, false, false,
229 false, false, false, false, true,
230 false}},
231 // Thai
232 {"xn--12c2cc4ag3b4ccu.th",
233 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
234 {true, false, false, false, false,
235 false, false, false, false, false,
236 false, false, false, true, false,
237 false, false, false, false, false,
238 false}},
239 // Devangari (Hindi)
240 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
241 {true, false, false, false, false,
242 false, false, false, false, false,
243 false, false, false, false, true,
244 false, false, false, false, false,
245 false}},
246 // Invalid IDN
247 {"xn--hello?world.com", NULL,
248 {false, false, false, false, false,
249 false, false, false, false, false,
250 false, false, false, false, false,
251 false, false, false, false, false,
252 false}},
253 // Unsafe IDNs
254 // "payp<alpha>l.com"
255 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
256 {false, false, false, false, false,
257 false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false}},
261 // google.gr with Greek omicron and epsilon
262 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
263 {false, false, false, false, false,
264 false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false}},
268 // google.ru with Cyrillic o
269 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
270 {false, false, false, false, false,
271 false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false}},
275 // h<e with acute>llo<China in Han>.cn
276 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
277 {false, false, false, false, false,
278 false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false}},
282 // <Greek rho><Cyrillic a><Cyrillic u>.ru
283 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
284 {false, false, false, false, false,
285 false, false, false, false, false,
286 false, false, false, false, false,
287 false, false, false, false, false,
288 false}},
289 // One that's really long that will force a buffer realloc
290 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
291 "aaaaaaa",
292 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
293 L"aaaaaaaa",
294 {true, true, true, true, true,
295 true, true, true, true, true,
296 true, true, true, true, true,
297 true, true, true, true, true,
298 true}},
299 // Test cases for characters we blacklisted although allowed in IDN.
300 // Embedded spaces will be turned to %20 in the display.
301 // TODO(jungshik): We need to have more cases. This is a typical
302 // data-driven trap. The following test cases need to be separated
303 // and tested only for a couple of languages.
304 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
305 {false, false, false, false, false,
306 false, false, false, false, false,
307 false, false, false, false, false,
308 false, false, false, false, false,
309 false}},
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
311 {false, false, false, false, false,
312 false, false, false, false, false,
313 false, false, false, false, false,
314 false, false, false, false, false,
315 }},
316 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
317 {false, false, false, false, false,
318 false, false, false, false, false,
319 false, false, false, false, false,
320 false, false, false, false, false,
321 }},
322 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
323 {false, false, false, false, false,
324 false, false, false, false, false,
325 false, false, false, false, false,
326 false, false, false, false, false,
327 }},
328 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
329 {false, false, false, false, false,
330 false, false, false, false, false,
331 false, false, false, false, false,
332 false, false, false, false, false,
333 }},
334 // Padlock icon spoof.
335 {"xn--google-hj64e", L"\U0001f512google.com",
336 {false, false, false, false, false,
337 false, false, false, false, false,
338 false, false, false, false, false,
339 false, false, false, false, false,
340 }},
341 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist
342 // all strings with the surrogate '\xdd12'.
343 {"xn--fk9c.com", L"\U00010912.com",
344 {true, false, false, false, false,
345 false, false, false, false, false,
346 false, false, false, false, false,
347 false, false, false, false, false,
348 }},
349 #if 0 141 #if 0
350 // These two cases are special. We need a separate test. 142 // These two cases are special. We need a separate test.
351 // U+3000 and U+3002 are normalized to ASCII space and dot. 143 // U+3000 and U+3002 are normalized to ASCII space and dot.
352 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", 144 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
353 {false, false, true, false, false, 145 {false, false, true, false, false,
354 false, false, false, false, false, 146 false, false, false, false, false,
355 false, false, false, false, false, 147 false, false, false, false, false,
356 false, false, true, false, false, 148 false, false, true, false, false,
357 true}}, 149 true}},
358 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", 150 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
359 {false, false, true, false, false, 151 {false, false, true, false, false,
360 false, false, false, false, false, 152 false, false, false, false, false,
361 false, false, false, false, false, 153 false, false, false, false, false,
362 false, false, true, false, false, 154 false, false, true, false, false,
363 true}}, 155 true}},
364 #endif 156 #endif
365 }; 157 };
366 158
367 struct AdjustOffsetCase { 159 struct AdjustOffsetCase {
368 size_t input_offset; 160 size_t input_offset;
369 size_t output_offset; 161 size_t output_offset;
370 }; 162 };
371 163
372 struct UrlTestData { 164 struct UrlTestData {
373 const char* const description; 165 const char* const description;
374 const char* const input; 166 const char* const input;
375 const char* const languages;
376 FormatUrlTypes format_types; 167 FormatUrlTypes format_types;
377 UnescapeRule::Type escape_rules; 168 UnescapeRule::Type escape_rules;
378 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. 169 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
379 size_t prefix_len; 170 size_t prefix_len;
380 }; 171 };
381 172
382 // A helper for IDN*{Fast,Slow}.
383 // Append "::<language list>" to |expected| and |actual| to make it
384 // easy to tell which sub-case fails without debugging.
385 void AppendLanguagesToOutputs(const char* languages,
386 base::string16* expected,
387 base::string16* actual) {
388 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
389 expected->append(to_append);
390 actual->append(to_append);
391 }
392
393 // A pair of helpers for the FormatUrlWithOffsets() test. 173 // A pair of helpers for the FormatUrlWithOffsets() test.
394 void VerboseExpect(size_t expected, 174 void VerboseExpect(size_t expected,
395 size_t actual, 175 size_t actual,
396 const std::string& original_url, 176 const std::string& original_url,
397 size_t position, 177 size_t position,
398 const base::string16& formatted_url) { 178 const base::string16& formatted_url) {
399 EXPECT_EQ(expected, actual) << "Original URL: " << original_url 179 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
400 << " (at char " << position << ")\nFormatted URL: " << formatted_url; 180 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
401 } 181 }
402 182
403 void CheckAdjustedOffsets(const std::string& url_string, 183 void CheckAdjustedOffsets(const std::string& url_string,
404 const std::string& languages, 184 const std::string& languages,
405 FormatUrlTypes format_types, 185 FormatUrlTypes format_types,
406 UnescapeRule::Type unescape_rules, 186 UnescapeRule::Type unescape_rules,
407 const size_t* output_offsets) { 187 const size_t* output_offsets) {
408 GURL url(url_string); 188 GURL url(url_string);
409 size_t url_length = url_string.length(); 189 size_t url_length = url_string.length();
410 std::vector<size_t> offsets; 190 std::vector<size_t> offsets;
411 for (size_t i = 0; i <= url_length + 1; ++i) 191 for (size_t i = 0; i <= url_length + 1; ++i)
412 offsets.push_back(i); 192 offsets.push_back(i);
413 offsets.push_back(500000); // Something larger than any input length. 193 offsets.push_back(500000); // Something larger than any input length.
414 offsets.push_back(std::string::npos); 194 offsets.push_back(std::string::npos);
415 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, 195 base::string16 formatted_url = FormatUrlWithOffsets(
416 format_types, unescape_rules, NULL, NULL, &offsets); 196 url, std::string(), format_types, unescape_rules, NULL, NULL, &offsets);
417 for (size_t i = 0; i < url_length; ++i) 197 for (size_t i = 0; i < url_length; ++i)
418 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); 198 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
419 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, 199 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
420 url_length, formatted_url); 200 url_length, formatted_url);
421 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, 201 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
422 500000, formatted_url); 202 500000, formatted_url);
423 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, 203 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
424 std::string::npos, formatted_url); 204 std::string::npos, formatted_url);
425 } 205 }
426 206
427 } // anonymous namespace 207 } // anonymous namespace
428 208
429 TEST(NetUtilTest, IDNToUnicodeFast) { 209 TEST(NetUtilTest, IDNToUnicode) {
430 for (size_t i = 0; i < arraysize(idn_cases); i++) { 210 for (size_t i = 0; i < arraysize(idn_cases); i++) {
431 for (size_t j = 0; j < arraysize(kLanguages); j++) { 211 base::string16 output(IDNToUnicode(idn_cases[i].input, std::string()));
432 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow 212 base::string16 expected(idn_cases[i].unicode_allowed
433 if (j == 3 || j == 17 || j == 18) 213 ? WideToUTF16(idn_cases[i].unicode_output)
434 continue; 214 : ASCIIToUTF16(idn_cases[i].input));
435 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); 215 EXPECT_EQ(expected, output) << "input # " << i << ": \""
436 base::string16 expected(idn_cases[i].unicode_allowed[j] ? 216 << idn_cases[i].input << "\"";
437 WideToUTF16(idn_cases[i].unicode_output) :
438 ASCIIToUTF16(idn_cases[i].input));
439 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
440 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
441 << "\", languages: \"" << kLanguages[j]
442 << "\"";
443 }
444 } 217 }
445 } 218 }
446 219
447 TEST(NetUtilTest, IDNToUnicodeSlow) {
448 for (size_t i = 0; i < arraysize(idn_cases); i++) {
449 for (size_t j = 0; j < arraysize(kLanguages); j++) {
450 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
451 if (!(j == 3 || j == 17 || j == 18))
452 continue;
453 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
454 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
455 WideToUTF16(idn_cases[i].unicode_output) :
456 ASCIIToUTF16(idn_cases[i].input));
457 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
458 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
459 << "\", languages: \"" << kLanguages[j]
460 << "\"";
461 }
462 }
463 }
464
465 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
466 // te), which was causing a crash (See http://crbug.com/510551). This may be an
467 // icu bug, but regardless, that should not cause a crash.
468 TEST(NetUtilTest, IDNToUnicodeNeverCrashes) {
469 for (char c1 = 'a'; c1 <= 'z'; c1++) {
470 for (char c2 = 'a'; c2 <= 'z'; c2++) {
471 std::string lang = base::StringPrintf("%c%c", c1, c2);
472 base::string16 output(IDNToUnicode("xn--74h", lang));
473 }
474 }
475 }
476
477 TEST(NetUtilTest, StripWWW) { 220 TEST(NetUtilTest, StripWWW) {
478 EXPECT_EQ(base::string16(), StripWWW(base::string16())); 221 EXPECT_EQ(base::string16(), StripWWW(base::string16()));
479 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www."))); 222 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www.")));
480 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah"))); 223 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
481 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); 224 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
482 } 225 }
483 226
484 // This is currently a windows specific function. 227 // This is currently a windows specific function.
485 #if defined(OS_WIN) 228 #if defined(OS_WIN)
486 namespace { 229 namespace {
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
547 test_cases[i].time); 290 test_cases[i].time);
548 EXPECT_EQ(test_cases[i].expected, results); 291 EXPECT_EQ(test_cases[i].expected, results);
549 } 292 }
550 } 293 }
551 294
552 #endif 295 #endif
553 296
554 TEST(NetUtilTest, FormatUrl) { 297 TEST(NetUtilTest, FormatUrl) {
555 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; 298 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
556 const UrlTestData tests[] = { 299 const UrlTestData tests[] = {
557 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0}, 300 {"Empty URL", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
558 301
559 {"Simple URL", 302 {"Simple URL", "http://www.google.com/", default_format_type,
560 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL, 303 UnescapeRule::NORMAL, L"http://www.google.com/", 7},
561 L"http://www.google.com/", 7},
562 304
563 {"With a port number and a reference", 305 {"With a port number and a reference",
564 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, 306 "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type,
565 UnescapeRule::NORMAL, 307 UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
566 L"http://www.google.com:8080/#\x30B0", 7},
567 308
568 // -------- IDN tests -------- 309 // -------- IDN tests --------
569 {"Japanese IDN with ja", 310 {"Japanese IDN", "http://xn--l8jvb1ey91xtjb.jp", default_format_type,
570 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, 311 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
571 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
572 312
573 {"Japanese IDN with en", 313 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp",
574 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type, 314 default_format_type, UnescapeRule::NORMAL,
575 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, 315 // GURL doesn't assume an email address's domain part as a host name.
316 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
576 317
577 {"Japanese IDN without any languages", 318 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
578 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type, 319 default_format_type, UnescapeRule::NORMAL,
579 UnescapeRule::NORMAL, 320 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
580 // Single script is safe for empty languages.
581 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
582 321
583 {"mailto: with Japanese IDN", 322 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys",
584 "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, 323 default_format_type, UnescapeRule::NORMAL,
585 UnescapeRule::NORMAL, 324 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
586 // GURL doesn't assume an email address's domain part as a host name.
587 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
588 325
589 {"file: with Japanese IDN", 326 // -------- omit_username_password flag tests --------
590 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, 327 {"With username and password, omit_username_password=false",
591 UnescapeRule::NORMAL, 328 "http://user:passwd@example.com/foo", kFormatUrlOmitNothing,
592 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, 329 UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
593 330
594 {"ftp: with Japanese IDN", 331 {"With username and password, omit_username_password=true",
595 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, 332 "http://user:passwd@example.com/foo", default_format_type,
596 UnescapeRule::NORMAL, 333 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
597 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
598 334
599 // -------- omit_username_password flag tests -------- 335 {"With username and no password", "http://user@example.com/foo",
600 {"With username and password, omit_username_password=false", 336 default_format_type, UnescapeRule::NORMAL, L"http://example.com/foo", 7},
601 "http://user:passwd@example.com/foo", "",
602 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
603 L"http://user:passwd@example.com/foo", 19},
604 337
605 {"With username and password, omit_username_password=true", 338 {"Just '@' without username and password", "http://@example.com/foo",
606 "http://user:passwd@example.com/foo", "", default_format_type, 339 default_format_type, UnescapeRule::NORMAL, L"http://example.com/foo", 7},
607 UnescapeRule::NORMAL, L"http://example.com/foo", 7},
608 340
609 {"With username and no password", 341 // GURL doesn't think local-part of an email address is username for URL.
610 "http://user@example.com/foo", "", default_format_type, 342 {"mailto:, omit_username_password=true", "mailto:foo@example.com",
611 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, 343 default_format_type, UnescapeRule::NORMAL, L"mailto:foo@example.com", 7},
612 344
613 {"Just '@' without username and password", 345 // -------- unescape flag tests --------
614 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL, 346 {"Do not unescape",
615 L"http://example.com/foo", 7}, 347 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
348 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
349 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
350 default_format_type, UnescapeRule::NONE,
351 // GURL parses %-encoded hostnames into Punycode.
352 L"http://\x30B0\x30FC\x30B0\x30EB.jp/"
353 L"%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
354 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
355 7},
616 356
617 // GURL doesn't think local-part of an email address is username for URL. 357 {"Unescape normally",
618 {"mailto:, omit_username_password=true", 358 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
619 "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL, 359 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
620 L"mailto:foo@example.com", 7}, 360 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
361 default_format_type, UnescapeRule::NORMAL,
362 L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB"
363 L"?q=\x30B0\x30FC\x30B0\x30EB",
364 7},
621 365
622 // -------- unescape flag tests -------- 366 {"Unescape normally with BiDi control character",
623 {"Do not unescape", 367 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", default_format_type,
624 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 368 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy",
625 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 369 7},
626 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
627 UnescapeRule::NONE,
628 // GURL parses %-encoded hostnames into Punycode.
629 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
630 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
631 370
632 {"Unescape normally", 371 {"Unescape normally including unescape spaces",
633 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 372 "http://www.google.com/search?q=Hello%20World", default_format_type,
634 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 373 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
635 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
636 UnescapeRule::NORMAL,
637 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
638 L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
639 374
640 {"Unescape normally with BiDi control character", 375 /*
641 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type, 376 {"unescape=true with some special characters",
642 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, 377 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
378 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
379 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
380 */
381 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
643 382
644 {"Unescape normally including unescape spaces", 383 // -------- omit http: --------
645 "http://www.google.com/search?q=Hello%20World", "en", default_format_type, 384 {"omit http with user name", "http://user@example.com/foo",
646 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7}, 385 kFormatUrlOmitAll, UnescapeRule::NORMAL, L"example.com/foo", 0},
647 386
648 /* 387 {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP,
649 {"unescape=true with some special characters", 388 UnescapeRule::NORMAL, L"www.google.com/", 0},
650 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
651 kFormatUrlOmitNothing, UnescapeRule::NORMAL,
652 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
653 */
654 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
655 389
656 // -------- omit http: -------- 390 {"omit http with https", "https://www.google.com/", kFormatUrlOmitHTTP,
657 {"omit http with user name", 391 UnescapeRule::NORMAL, L"https://www.google.com/", 8},
658 "http://user@example.com/foo", "", kFormatUrlOmitAll,
659 UnescapeRule::NORMAL, L"example.com/foo", 0},
660 392
661 {"omit http", 393 {"omit http starts with ftp.", "http://ftp.google.com/",
662 "http://www.google.com/", "en", kFormatUrlOmitHTTP, 394 kFormatUrlOmitHTTP, UnescapeRule::NORMAL, L"http://ftp.google.com/", 7},
663 UnescapeRule::NORMAL, L"www.google.com/",
664 0},
665 395
666 {"omit http with https", 396 // -------- omit trailing slash on bare hostname --------
667 "https://www.google.com/", "en", kFormatUrlOmitHTTP, 397 {"omit slash when it's the entire path", "http://www.google.com/",
668 UnescapeRule::NORMAL, L"https://www.google.com/", 398 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
669 8}, 399 L"http://www.google.com", 7},
400 {"omit slash when there's a ref", "http://www.google.com/#ref",
401 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
402 L"http://www.google.com/#ref", 7},
403 {"omit slash when there's a query", "http://www.google.com/?",
404 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
405 L"http://www.google.com/?", 7},
406 {"omit slash when it's not the entire path", "http://www.google.com/foo",
407 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
408 L"http://www.google.com/foo", 7},
409 {"omit slash for nonstandard URLs", "data:/",
410 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
411 L"data:/", 5},
412 {"omit slash for file URLs", "file:///",
413 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
414 L"file:///", 7},
670 415
671 {"omit http starts with ftp.", 416 // -------- view-source: --------
672 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP, 417 {"view-source", "view-source:http://xn--qcka1pmc.jp/",
673 UnescapeRule::NORMAL, L"http://ftp.google.com/", 418 default_format_type, UnescapeRule::NORMAL,
674 7}, 419 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
675 420
676 // -------- omit trailing slash on bare hostname -------- 421 {"view-source of view-source",
677 {"omit slash when it's the entire path", 422 "view-source:view-source:http://xn--qcka1pmc.jp/", default_format_type,
678 "http://www.google.com/", "en", 423 UnescapeRule::NORMAL, L"view-source:view-source:http://xn--qcka1pmc.jp/",
679 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, 424 12},
680 L"http://www.google.com", 7},
681 {"omit slash when there's a ref",
682 "http://www.google.com/#ref", "en",
683 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
684 L"http://www.google.com/#ref", 7},
685 {"omit slash when there's a query",
686 "http://www.google.com/?", "en",
687 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
688 L"http://www.google.com/?", 7},
689 {"omit slash when it's not the entire path",
690 "http://www.google.com/foo", "en",
691 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
692 L"http://www.google.com/foo", 7},
693 {"omit slash for nonstandard URLs",
694 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
695 UnescapeRule::NORMAL, L"data:/", 5},
696 {"omit slash for file URLs",
697 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
698 UnescapeRule::NORMAL, L"file:///", 7},
699 425
700 // -------- view-source: -------- 426 // view-source should omit http and trailing slash where non-view-source
701 {"view-source", 427 // would.
702 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type, 428 {"view-source omit http", "view-source:http://a.b/c", kFormatUrlOmitAll,
703 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 429 UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
704 19}, 430 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
705 431 kFormatUrlOmitAll, UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
706 {"view-source of view-source", 432 19},
707 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", 433 {"view-source omit slash when it's the entire path",
708 default_format_type, UnescapeRule::NORMAL, 434 "view-source:http://a.b/", kFormatUrlOmitAll, UnescapeRule::NORMAL,
709 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, 435 L"view-source:a.b", 12},
710
711 // view-source should omit http and trailing slash where non-view-source
712 // would.
713 {"view-source omit http",
714 "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
715 UnescapeRule::NORMAL, L"view-source:a.b/c",
716 12},
717 {"view-source omit http starts with ftp.",
718 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
719 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
720 19},
721 {"view-source omit slash when it's the entire path",
722 "view-source:http://a.b/", "en", kFormatUrlOmitAll,
723 UnescapeRule::NORMAL, L"view-source:a.b",
724 12},
725 }; 436 };
726 437
727 for (size_t i = 0; i < arraysize(tests); ++i) { 438 for (size_t i = 0; i < arraysize(tests); ++i) {
728 size_t prefix_len; 439 size_t prefix_len;
729 base::string16 formatted = FormatUrl( 440 base::string16 formatted =
730 GURL(tests[i].input), tests[i].languages, tests[i].format_types, 441 FormatUrl(GURL(tests[i].input), std::string(), tests[i].format_types,
731 tests[i].escape_rules, NULL, &prefix_len, NULL); 442 tests[i].escape_rules, NULL, &prefix_len, NULL);
732 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; 443 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
733 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; 444 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
734 } 445 }
735 } 446 }
736 447
737 TEST(NetUtilTest, FormatUrlParsed) { 448 TEST(NetUtilTest, FormatUrlParsed) {
738 // No unescape case. 449 // No unescape case.
739 url::Parsed parsed; 450 url::Parsed parsed;
740 base::string16 formatted = FormatUrl( 451 base::string16 formatted =
741 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 452 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
742 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 453 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
743 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL, 454 std::string(), kFormatUrlOmitNothing, UnescapeRule::NONE,
744 NULL); 455 &parsed, NULL, NULL);
745 EXPECT_EQ(WideToUTF16( 456 EXPECT_EQ(WideToUTF16(
746 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 457 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
747 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); 458 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
748 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), 459 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
749 formatted.substr(parsed.username.begin, parsed.username.len)); 460 formatted.substr(parsed.username.begin, parsed.username.len));
750 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), 461 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
751 formatted.substr(parsed.password.begin, parsed.password.len)); 462 formatted.substr(parsed.password.begin, parsed.password.len));
752 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 463 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
753 formatted.substr(parsed.host.begin, parsed.host.len)); 464 formatted.substr(parsed.host.begin, parsed.host.len));
754 EXPECT_EQ(WideToUTF16(L"8080"), 465 EXPECT_EQ(WideToUTF16(L"8080"),
755 formatted.substr(parsed.port.begin, parsed.port.len)); 466 formatted.substr(parsed.port.begin, parsed.port.len));
756 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), 467 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
757 formatted.substr(parsed.path.begin, parsed.path.len)); 468 formatted.substr(parsed.path.begin, parsed.path.len));
758 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), 469 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
759 formatted.substr(parsed.query.begin, parsed.query.len)); 470 formatted.substr(parsed.query.begin, parsed.query.len));
760 EXPECT_EQ(WideToUTF16(L"\x30B0"), 471 EXPECT_EQ(WideToUTF16(L"\x30B0"),
761 formatted.substr(parsed.ref.begin, parsed.ref.len)); 472 formatted.substr(parsed.ref.begin, parsed.ref.len));
762 473
763 // Unescape case. 474 // Unescape case.
764 formatted = FormatUrl( 475 formatted =
765 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 476 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
766 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 477 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
767 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL, 478 std::string(), kFormatUrlOmitNothing, UnescapeRule::NORMAL,
768 NULL); 479 &parsed, NULL, NULL);
769 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 480 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
770 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); 481 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
771 EXPECT_EQ(WideToUTF16(L"\x30B0"), 482 EXPECT_EQ(WideToUTF16(L"\x30B0"),
772 formatted.substr(parsed.username.begin, parsed.username.len)); 483 formatted.substr(parsed.username.begin, parsed.username.len));
773 EXPECT_EQ(WideToUTF16(L"\x30FC"), 484 EXPECT_EQ(WideToUTF16(L"\x30FC"),
774 formatted.substr(parsed.password.begin, parsed.password.len)); 485 formatted.substr(parsed.password.begin, parsed.password.len));
775 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 486 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
776 formatted.substr(parsed.host.begin, parsed.host.len)); 487 formatted.substr(parsed.host.begin, parsed.host.len));
777 EXPECT_EQ(WideToUTF16(L"8080"), 488 EXPECT_EQ(WideToUTF16(L"8080"),
778 formatted.substr(parsed.port.begin, parsed.port.len)); 489 formatted.substr(parsed.port.begin, parsed.port.len));
779 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), 490 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
780 formatted.substr(parsed.path.begin, parsed.path.len)); 491 formatted.substr(parsed.path.begin, parsed.path.len));
781 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), 492 EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
782 formatted.substr(parsed.query.begin, parsed.query.len)); 493 formatted.substr(parsed.query.begin, parsed.query.len));
783 EXPECT_EQ(WideToUTF16(L"\x30B0"), 494 EXPECT_EQ(WideToUTF16(L"\x30B0"),
784 formatted.substr(parsed.ref.begin, parsed.ref.len)); 495 formatted.substr(parsed.ref.begin, parsed.ref.len));
785 496
786 // Omit_username_password + unescape case. 497 // Omit_username_password + unescape case.
787 formatted = FormatUrl( 498 formatted =
788 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 499 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
789 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 500 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
790 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed, 501 std::string(), kFormatUrlOmitUsernamePassword,
791 NULL, NULL); 502 UnescapeRule::NORMAL, &parsed, NULL, NULL);
792 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" 503 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
793 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); 504 L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
794 EXPECT_FALSE(parsed.username.is_valid()); 505 EXPECT_FALSE(parsed.username.is_valid());
795 EXPECT_FALSE(parsed.password.is_valid()); 506 EXPECT_FALSE(parsed.password.is_valid());
796 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 507 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
797 formatted.substr(parsed.host.begin, parsed.host.len)); 508 formatted.substr(parsed.host.begin, parsed.host.len));
798 EXPECT_EQ(WideToUTF16(L"8080"), 509 EXPECT_EQ(WideToUTF16(L"8080"),
799 formatted.substr(parsed.port.begin, parsed.port.len)); 510 formatted.substr(parsed.port.begin, parsed.port.len));
800 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), 511 EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
801 formatted.substr(parsed.path.begin, parsed.path.len)); 512 formatted.substr(parsed.path.begin, parsed.path.len));
(...skipping 292 matching lines...) Expand 10 before | Expand all | Expand 10 after
1094 805
1095 const size_t omit_all_offsets[] = { 806 const size_t omit_all_offsets[] = {
1096 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 807 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
1097 0, 1, 2, 3, 4, 5, 6, 7 808 0, 1, 2, 3, 4, 5, 6, 7
1098 }; 809 };
1099 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, 810 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
1100 UnescapeRule::NORMAL, omit_all_offsets); 811 UnescapeRule::NORMAL, omit_all_offsets);
1101 } 812 }
1102 813
1103 } // namespace net 814 } // namespace net
OLDNEW
« net/base/net_util_icu.cc ('K') | « net/base/net_util_icu.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698