Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(494)

Side by Side Diff: components/url_formatter/url_formatter_unittest.cc

Issue 1258813002: Implement a new IDN display policy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: more comment update per Peter Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/url_formatter/url_formatter.cc ('k') | url/url_canon_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <string.h> 8 #include <string.h>
9 9
10 #include <vector> 10 #include <vector>
11 11
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/stringprintf.h" 14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
16 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "url/gurl.h" 17 #include "url/gurl.h"
18 18
19 19
20 namespace url_formatter { 20 namespace url_formatter {
21 21
22 namespace { 22 namespace {
23 23
24 using base::WideToUTF16; 24 using base::WideToUTF16;
25 using base::ASCIIToUTF16; 25 using base::ASCIIToUTF16;
26 26
27 const size_t kNpos = base::string16::npos; 27 const size_t kNpos = base::string16::npos;
28 28
29 const char* const kLanguages[] = {
30 "", "en", "zh-CN", "ja", "ko",
31 "he", "ar", "ru", "el", "fr",
32 "de", "pt", "sv", "th", "hi",
33 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
34 "zh,ru,en"
35 };
36
37 struct IDNTestCase { 29 struct IDNTestCase {
38 const char* const input; 30 const char* const input;
39 const wchar_t* unicode_output; 31 const wchar_t* unicode_output;
40 const bool unicode_allowed[arraysize(kLanguages)]; 32 const bool unicode_allowed;
41 }; 33 };
42 34
43 // TODO(jungshik) This is just a random sample of languages and is far
44 // from exhaustive. We may have to generate all the combinations
45 // of languages (powerset of a set of all the languages).
46 const IDNTestCase idn_cases[] = { 35 const IDNTestCase idn_cases[] = {
47 // No IDN 36 // No IDN
48 {"www.google.com", L"www.google.com", 37 {"www.google.com", L"www.google.com", true},
49 {true, true, true, true, true, 38 {"www.google.com.", L"www.google.com.", true},
50 true, true, true, true, true, 39 {".", L".", true},
51 true, true, true, true, true, 40 {"", L"", true},
52 true, true, true, true, true,
53 true}},
54 {"www.google.com.", L"www.google.com.",
55 {true, true, true, true, true,
56 true, true, true, true, true,
57 true, true, true, true, true,
58 true, true, true, true, true,
59 true}},
60 {".", L".",
61 {true, true, true, true, true,
62 true, true, true, true, true,
63 true, true, true, true, true,
64 true, true, true, true, true,
65 true}},
66 {"", L"",
67 {true, true, true, true, true,
68 true, true, true, true, true,
69 true, true, true, true, true,
70 true, true, true, true, true,
71 true}},
72 // IDN 41 // IDN
73 // Hanzi (Traditional Chinese) 42 // Hanzi (Traditional Chinese)
74 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", 43 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
75 {true, false, true, true, false, 44 // Hanzi ('video' in Simplified Chinese
76 false, false, false, false, false, 45 {"xn--cy2a840a.com", L"\x89c6\x9891.com", true},
77 false, false, false, false, false,
78 false, false, true, true, false,
79 true}},
80 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
81 {"xn--cy2a840a.com", L"\x89c6\x9891.com",
82 {true, false, true, false, false,
83 false, false, false, false, false,
84 false, false, false, false, false,
85 false, false, false, false, false,
86 true}},
87 // Hanzi + '123' 46 // Hanzi + '123'
88 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", 47 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", true},
89 {true, false, true, true, false, 48 // Hanzi + Latin : U+56FD is simplified
90 false, false, false, false, false, 49 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true},
91 false, false, false, false, false,
92 false, false, true, true, false,
93 true}},
94 // Hanzi + Latin : U+56FD is simplified and is regarded
95 // as not supported in zh-TW.
96 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
97 {false, false, true, true, false,
98 false, false, false, false, false,
99 false, false, false, false, false,
100 false, false, false, true, false,
101 true}},
102 // Kanji + Kana (Japanese) 50 // Kanji + Kana (Japanese)
103 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", 51 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true},
104 {true, false, false, true, false,
105 false, false, false, false, false,
106 false, false, false, false, false,
107 false, false, false, true, false,
108 false}},
109 // Katakana including U+30FC 52 // Katakana including U+30FC
110 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", 53 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true},
111 {true, false, false, true, false, 54 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true},
112 false, false, false, false, false,
113 false, false, false, false, false,
114 false, false, false, true, false,
115 }},
116 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
117 {true, false, false, true, false,
118 false, false, false, false, false,
119 false, false, false, false, false,
120 false, false, false, true, false,
121 }},
122 // Katakana + Latin (Japanese) 55 // Katakana + Latin (Japanese)
123 // TODO(jungshik): Change 'false' in the first element to 'true' 56 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true},
124 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead 57 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true},
125 // of our IsIDNComponentInSingleScript().
126 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
127 {false, false, false, true, false,
128 false, false, false, false, false,
129 false, false, false, false, false,
130 false, false, false, true, false,
131 }},
132 {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
133 {false, false, false, true, false,
134 false, false, false, false, false,
135 false, false, false, false, false,
136 false, false, false, true, false,
137 }},
138 // Hangul (Korean) 58 // Hangul (Korean)
139 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", 59 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true},
140 {true, false, false, false, true,
141 false, false, false, false, false,
142 false, false, false, false, false,
143 false, false, false, true, false,
144 false}},
145 // b<u-umlaut>cher (German) 60 // b<u-umlaut>cher (German)
146 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", 61 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", true},
147 {true, false, false, false, false,
148 false, false, false, false, true,
149 true, false, false, false, false,
150 true, false, false, false, false,
151 false}},
152 // a with diaeresis 62 // a with diaeresis
153 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", 63 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true},
154 {true, false, false, false, false,
155 false, false, false, false, false,
156 true, false, true, false, false,
157 true, false, false, false, false,
158 false}},
159 // c-cedilla (French) 64 // c-cedilla (French)
160 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", 65 {"www.xn--alliancefranaise-npb.fr",
161 {true, false, false, false, false, 66 L"www.alliancefran\x00e7" L"aise.fr", true},
162 false, false, false, false, true,
163 false, true, false, false, false,
164 false, false, false, false, false,
165 false}},
166 // caf'e with acute accent' (French) 67 // caf'e with acute accent' (French)
167 {"xn--caf-dma.fr", L"caf\x00e9.fr", 68 {"xn--caf-dma.fr", L"caf\x00e9.fr", true},
168 {true, false, false, false, false,
169 false, false, false, false, true,
170 false, true, true, false, false,
171 false, false, false, false, false,
172 false}},
173 // c-cedillla and a with tilde (Portuguese) 69 // c-cedillla and a with tilde (Portuguese)
174 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", 70 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true},
175 {true, false, false, false, false,
176 false, false, false, false, false,
177 false, true, false, false, false,
178 false, false, false, false, false,
179 false}},
180 // s with caron 71 // s with caron
181 {"xn--achy-f6a.com", L"\x0161" L"achy.com", 72 {"xn--achy-f6a.com", L"\x0161" L"achy.com", true},
182 {true, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false, false, false, false, false,
186 false}},
187 // TODO(jungshik) : Add examples with Cyrillic letters
188 // only used in some languages written in Cyrillic.
189 // Eutopia (Greek)
190 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", 73 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
191 {true, false, false, false, false, 74 true},
192 false, false, false, true, false,
193 false, false, false, false, false,
194 false, true, false, false, false,
195 false}},
196 // Eutopia + 123 (Greek) 75 // Eutopia + 123 (Greek)
197 {"xn---123-pldm0haj2bk.gr", 76 {"xn---123-pldm0haj2bk.gr",
198 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", 77 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true},
199 {true, false, false, false, false,
200 false, false, false, true, false,
201 false, false, false, false, false,
202 false, true, false, false, false,
203 false}},
204 // Cyrillic (Russian) 78 // Cyrillic (Russian)
205 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", 79 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true},
206 {true, false, false, false, false,
207 false, false, true, false, false,
208 false, false, false, false, false,
209 false, false, false, false, true,
210 true}},
211 // Cyrillic + 123 (Russian) 80 // Cyrillic + 123 (Russian)
212 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", 81 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true},
213 {true, false, false, false, false, 82 // 'president' in Russian. Is a wholescript confusable, but allowed.
214 false, false, true, false, false, 83 {"xn--d1abbgf6aiiy.xn--p1ai",
215 false, false, false, false, false, 84 L"\x043f\x0440\x0435\x0437\x0438\x0434\x0435\x043d\x0442.\x0440\x0444",
216 false, false, false, false, true, 85 true},
217 true}},
218 // Arabic 86 // Arabic
219 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", 87 {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true},
220 {true, false, false, false, false,
221 false, true, false, false, false,
222 false, false, false, false, false,
223 false, false, false, false, false,
224 false}},
225 // Hebrew 88 // Hebrew
226 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", 89 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true},
227 {true, false, false, false, false, 90 // Hebrew + Common
228 true, false, false, false, false, 91 {"xn---123-ptf2c5c6bt.il", L"\x05e2\x05d1\x05e8\x05d9\x05ea-123.il", true},
229 false, false, false, false, false,
230 false, false, false, false, true,
231 false}},
232 // Thai 92 // Thai
233 {"xn--12c2cc4ag3b4ccu.th", 93 {"xn--12c2cc4ag3b4ccu.th",
234 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", 94 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
235 {true, false, false, false, false, 95 // Thai + Common
236 false, false, false, false, false, 96 {"xn---123-9goxcp8c9db2r.th",
237 false, false, false, true, false, 97 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true},
238 false, false, false, false, false,
239 false}},
240 // Devangari (Hindi) 98 // Devangari (Hindi)
241 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", 99 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
242 {true, false, false, false, false, 100 // Devanagari + Common
243 false, false, false, false, false, 101 {"xn---123-kbjl2j0bl2k.in",
244 false, false, false, false, true, 102 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true},
245 false, false, false, false, false, 103
246 false}}, 104 // 5 Aspirational scripts
247 // Invalid IDN 105 // Unifieid Canadian Syllabary
248 {"xn--hello?world.com", NULL, 106 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true},
249 {false, false, false, false, false, 107 // Tifinagh
250 false, false, false, false, false, 108 {"xn--4ljxa2bb4a6bxb.ma",
251 false, false, false, false, false, 109 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true},
252 false, false, false, false, false, 110 // Tifinagh with a disallowed character(U+2D6F)
253 false}}, 111 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false},
254 // Unsafe IDNs 112 // Yi
113 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true},
114 // Mongolian - 'ordu' (place, camp)
115 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true},
116 // Mongolian with a disallowed character
117 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false},
118 // Miao/Pollad
119 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true},
120
121 // Script mixing tests
122 // The following script combinations are allowed.
123 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin.
124 // ASCII-Latin + Japn (Kana + Han)
125 // ASCII-Latin + Kore (Hangul + Han)
126 // ASCII-Latin + Han + Bopomofo
127 // ASCII-Latin + any allowed script other than Cyrillic, Greek and Cherokee
255 // "payp<alpha>l.com" 128 // "payp<alpha>l.com"
256 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", 129 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
257 {false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false, false, false, false, false,
261 false}},
262 // google.gr with Greek omicron and epsilon 130 // google.gr with Greek omicron and epsilon
263 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", 131 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false},
264 {false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false, false, false, false, false,
268 false}},
269 // google.ru with Cyrillic o 132 // google.ru with Cyrillic o
270 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", 133 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false},
271 {false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false, false, false, false, false,
275 false}},
276 // h<e with acute>llo<China in Han>.cn 134 // h<e with acute>llo<China in Han>.cn
277 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", 135 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false},
278 {false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false, false, false, false, false,
282 false}},
283 // <Greek rho><Cyrillic a><Cyrillic u>.ru 136 // <Greek rho><Cyrillic a><Cyrillic u>.ru
284 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", 137 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false},
285 {false, false, false, false, false, 138 // Hangul + Latin
286 false, false, false, false, false, 139 {"xn--han-eb9ll88m.kr", L"\xd55c\xae00han.kr", true},
287 false, false, false, false, false, 140 // Hangul + Latin + Han with IDN ccTLD
288 false, false, false, false, false, 141 {"xn--han-or0kq92gkm3c.xn--3e0b707e",
289 false}}, 142 L"\xd55c\xae00han\x97d3.\xd55c\xad6d", true},
143 // non-ASCII Latin + Hangul
144 {"xn--caf-dma9024xvpg.kr", L"caf\x00e9\xce74\xd398.kr", false},
145 // Hangul + Hiragana
146 {"xn--y9j3b9855e.kr", L"\xd55c\x3072\x3089.kr", false},
147 // <Hiragana>.<Hangul> is allowed because script mixing check is per label.
148 {"xn--y9j3b.xn--3e0b707e", L"\x3072\x3089.\xd55c\xad6d", true},
149 // Traditional Han + Latin
150 {"xn--hanzi-u57ii69i.tw", L"\x6f22\x5b57hanzi.tw", true},
151 // Simplified Han + Latin
152 {"xn--hanzi-u57i952h.cn", L"\x6c49\x5b57hanzi.cn", true},
153 // Simplified Han + Traditonal Han
154 {"xn--hanzi-if9kt8n.cn", L"\x6c49\x6f22hanzi.cn", true},
155 // Han + Hiragana + Katakana + Latin
156 {"xn--kanji-ii4dpizfq59yuykqr4b.jp",
157 L"\x632f\x308a\x4eee\x540d\x30ab\x30bfkanji.jp", true},
158 // Han + Bopomofo
159 {"xn--5ekcde0577e87tc.tw", L"\x6ce8\x97f3\x3105\x3106\x3107\x3108.tw", true},
160 // Han + Latin + Bopomofo
161 {"xn--bopo-ty4cghi8509kk7xd.tw",
162 L"\x6ce8\x97f3" L"bopo\x3105\x3106\x3107\x3108.tw", true},
163 // Latin + Bopomofo
164 {"xn--bopomofo-hj5gkalm.tw", L"bopomofo\x3105\x3106\x3107\x3108.tw", true},
165 // Bopomofo + Katakana
166 {"xn--lcka3d1bztghi.tw",
167 L"\x3105\x3106\x3107\x3108\x30ab\x30bf\x30ab\x30ca.tw", false},
168 // Bopomofo + Hangul
169 {"xn--5ekcde4543qbec.tw", L"\x3105\x3106\x3107\x3108\xc8fc\xc74c.tw", false},
170 // Devanagari + Latin
171 {"xn--ab-3ofh8fqbj6h.in", L"ab\x0939\x093f\x0928\x094d\x0926\x0940.in", true},
172 // Thai + Latin
173 {"xn--ab-jsi9al4bxdb6n.th",
174 L"ab\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22.th", true},
175 // <vitamin in Katakana>b1.com
176 {"xn--b1-xi4a7cvc9f.com", L"\x30d3\x30bf\x30df\x30f3" L"b1.com", true},
177 // Devanagari + Han
178 {"xn--t2bes3ds6749n.com", L"\x0930\x094b\x0932\x0947\x76e7\x0938.com", false},
179 // Devanagari + Bengali
180 {"xn--11b0x.in", L"\x0915\x0995.in", false},
181
182 // Invisibility check
183 // Thai tone mark malek(U+0E48) repeated
184 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false},
185 // Accute accent repeated
186 {"xn--a-xbba.com", L"a\x0301\x0301.com", false},
187 // 'a' with acuted accent + another acute accent
188 {"xn--1ca20i.com", L"\x00e1\x0301.com", false},
189
190 // Mixed script confusable
191 // google with Armenian Small Letter Oh(U+0585)
192 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false},
193 // Hiragana HE(U+3078) mixed with Katakana
194 {"xn--49jxi3as0d0fpc.com",
195 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false},
196 // U+30FC + Han
197 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false},
198 // Han + U+30FC + Han
199 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false},
200 // Latin + U+30FC + Latin
201 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false},
202 // Latin + U+30FB + Latin
203 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false},
204 // U+30FB + Latin
205 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false},
206
207 // Mixed digits: the first two will also fail mixed script test
208 // Latin + ASCII digit + Deva digit
209 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false},
210 // Latin + Deva digit + Beng digit
211 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false},
212 // ASCII digit + Deva digit
213 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false},
214 // Deva digit + Beng digit
215 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false},
216 // U+4E00 (CJK Ideograph One) is not a digit
217 {"xn--d12-s18d.cn", L"d12\x4e00.cn", true},
290 // One that's really long that will force a buffer realloc 218 // One that's really long that will force a buffer realloc
291 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 219 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
292 "aaaaaaa", 220 "aaaaaaa",
293 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 221 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
294 L"aaaaaaaa", 222 L"aaaaaaaa",
295 {true, true, true, true, true, 223 true},
296 true, true, true, true, true, 224
297 true, true, true, true, true, 225 // Not allowed; characters outside [:Identifier_Status=Allowed:]
298 true, true, true, true, true, 226 // Limited Use Scripts: UTS 31 Table 7.
299 true}}, 227 // Vai
300 // Test cases for characters we blacklisted although allowed in IDN. 228 {"xn--sn8a.com", L"\xa50b.com", false},
301 // Embedded spaces will be turned to %20 in the display. 229 // 'CARD' look-alike in Cherokee
302 // TODO(jungshik): We need to have more cases. This is a typical 230 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false},
303 // data-driven trap. The following test cases need to be separated 231 // Scripts excluded from Identifiers: UTS 31 Table 4
304 // and tested only for a couple of languages. 232 // Coptic
305 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", 233 {"xn--5ya.com", L"\x03e7.com", false},
306 {false, false, false, false, false, 234 // Old Italic
307 false, false, false, false, false, 235 {"xn--097cc.com", L"\U00010300\U00010301.com", false},
308 false, false, false, false, false, 236
309 false, false, false, false, false, 237 // U+115F (Hangul Filler)
310 false}}, 238 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
311 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", 239 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
312 {false, false, false, false, false, 240 // Latin small capital w
313 false, false, false, false, false, 241 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false},
314 false, false, false, false, false, 242 // Minus Sign(U+2212)
315 false, false, false, false, false, 243 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false},
316 }}, 244 // Latin Small Letter Script G
317 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", 245 {"xn--0naa.com", L"\x0261\x0261.com", false},
318 {false, false, false, false, false, 246 // Hangul Jamo(U+11xx)
319 false, false, false, false, false, 247 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false},
320 false, false, false, false, false, 248 // degree sign
321 false, false, false, false, false, 249 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false},
322 }}, 250 // Pound sign
323 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", 251 {"xn--5free-9ga.com", L"5free\x00a8.com", false},
324 {false, false, false, false, false, 252 // Hebrew points (U+05B0, U+05B6)
325 false, false, false, false, false, 253 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false},
326 false, false, false, false, false, 254 // Danda(U+0964)
327 false, false, false, false, false, 255 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false},
328 }}, 256 // Small letter script G(U+0261)
329 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", 257 {"xn--oogle-qmc.com", L"\x0261oogle.com", false},
330 {false, false, false, false, false, 258 // Small Katakana Extension(U+31F1)
331 false, false, false, false, false, 259 {"xn--wlk.com", L"\x31f1.com", false},
332 false, false, false, false, false, 260 // Heart symbol
333 false, false, false, false, false, 261 {"xn--ab-u0x.com", L"ab\x2665.com", false},
334 }}, 262 // Emoji
263 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false},
264 // Registered trade mark
265 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false},
266 // Latin Letter Retroflex Click
267 {"xn--registered-25c.com", L"registered\x01c3.com", false},
268 // ASCII '!' not allowed in IDN
269 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false},
270 // 'GOOGLE' in IPA extension
271 {"xn--1naa7pn51hcbaa.com",
272 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false},
335 // Padlock icon spoof. 273 // Padlock icon spoof.
336 {"xn--google-hj64e", L"\U0001f512google.com", 274 {"xn--google-hj64e", L"\U0001f512google.com", false},
337 {false, false, false, false, false, 275
338 false, false, false, false, false, 276 // Custom black list
339 false, false, false, false, false, 277 // Combining Long Solidus Overlay
340 false, false, false, false, false, 278 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false},
341 }}, 279 // Hyphenation Point instead of Katakana Middle dot
342 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist 280 {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false},
343 // all strings with the surrogate '\xdd12'. 281 // Gershayim with other Hebrew characters is allowed.
344 {"xn--fk9c.com", L"\U00010912.com", 282 {"xn--5db6bh9b.il", L"\x05e9\x05d1\x05f4\x05e6.il", true},
345 {true, false, false, false, false, 283 // Hebrew Gershayim with Latin is disallowed.
346 false, false, false, false, false, 284 {"xn--ab-yod.com", L"a\x05f4" L"b.com", false},
347 false, false, false, false, false, 285 // Hebrew Gershayim with Arabic is disallowed.
348 false, false, false, false, false, 286 {"xn--5eb7h.eg", L"\x0628\x05f4.eg", false},
349 }}, 287
350 #if 0 288 // Custom dangerous patterns
351 // These two cases are special. We need a separate test. 289 // Two Katakana-Hiragana combining mark in a row
352 // U+3000 and U+3002 are normalized to ASCII space and dot. 290 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false},
353 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", 291 // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}.
354 {false, false, true, false, false, 292 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", false},
355 false, false, false, false, false, 293 // TODO(jshin): Review the danger of allowing the following two.
356 false, false, false, false, false, 294 // Hiragana 'No' by itself is allowed.
357 false, false, true, false, false, 295 {"xn--ldk.jp", L"\x30ce.jp", true},
358 true}}, 296 // Hebrew Gershayim used by itself is allowed.
359 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", 297 {"xn--5eb.il", L"\x05f4.il", true},
360 {false, false, true, false, false, 298
361 false, false, false, false, false, 299 // 4 Deviation characters between IDNA 2003 and IDNA 2008
362 false, false, false, false, false, 300 // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma
363 false, false, true, false, false, 301 // and the latter two are mapped away. However, the punycode form should
364 true}}, 302 // remain in punycode.
365 #endif 303 // U+00DF(sharp-s)
304 {"xn--fu-hia.de", L"fu\x00df.de", false},
305 // U+03C2(final-sigma)
306 {"xn--mxac2c.gr", L"\x03b1\x03b2\x03c2.gr", false},
307 // U+200C(ZWNJ)
308 {"xn--h2by8byc123p.in", L"\x0924\x094d\x200c\x0930\x093f.in", false},
309 // U+200C(ZWJ)
310 {"xn--11b6iy14e.in", L"\x0915\x094d\x200d.in", false},
311
312 // Math Monospace Small A. When entered in Unicode, it's canonicalized to
313 // 'a'. The punycode form should remain in punycode.
314 {"xn--bc-9x80a.xyz", L"\U0001d68a" L"bc.xyz", false},
315 // Math Sans Bold Capital Alpha
316 {"xn--bc-rg90a.xyz", L"\U0001d756" L"bc.xyz", false},
317 // U+3000 is canonicalized to a space(U+0020), but the punycode form
318 // should remain in punycode.
319 {"xn--p6j412gn7f.cn", L"\x4e2d\x56fd\x3000", false},
320 // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form
321 // should remain in punycode.
322 {"xn--r6j012gn7f.cn", L"\x4e2d\x56fd\x3002", false},
323 // Invalid punycode
324 // Has a codepoint beyond U+10FFFF.
325 {"xn--krank-kg706554a", nullptr, false},
326 // '?' in punycode.
327 {"xn--hello?world.com", nullptr, false},
328
329 // Not allowed in UTS46/IDNA 2008
330 // Georgian Capital Letter(U+10BD)
331 {"xn--1nd.com", L"\x10bd.com", false},
332 // 3rd and 4th characters are '-'.
333 {"xn-----8kci4dhsd", L"\x0440\x0443--\x0430\x0432\x0442\x043e", false},
334 // Leading combining mark
335 {"xn--72b.com", L"\x093e.com", false},
336 // BiDi check per IDNA 2008/UTS 46
337 // Cannot starts with AN(Arabic-Indic Number)
338 {"xn--8hbae.eg", L"\x0662\x0660\x0660.eg", false},
339 // Cannot start with a RTL character and ends with a LTR
340 {"xn--x-ymcov.eg", L"\x062c\x0627\x0631x.eg", false},
341 // Can start with a RTL character and ends with EN(European Number)
342 {"xn--2-ymcov.eg", L"\x062c\x0627\x0631" L"2.eg", true},
343 // Can start with a RTL and end with AN
344 {"xn--mgbjq0r.eg", L"\x062c\x0627\x0631\x0662.eg", true},
366 }; 345 };
367 346
368 struct AdjustOffsetCase { 347 struct AdjustOffsetCase {
369 size_t input_offset; 348 size_t input_offset;
370 size_t output_offset; 349 size_t output_offset;
371 }; 350 };
372 351
373 struct UrlTestData { 352 struct UrlTestData {
374 const char* const description; 353 const char* const description;
375 const char* const input; 354 const char* const input;
376 const char* const languages;
377 FormatUrlTypes format_types; 355 FormatUrlTypes format_types;
378 net::UnescapeRule::Type escape_rules; 356 net::UnescapeRule::Type escape_rules;
379 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. 357 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
380 size_t prefix_len; 358 size_t prefix_len;
381 }; 359 };
382 360
383 // A helper for IDN*{Fast,Slow}.
384 // Append "::<language list>" to |expected| and |actual| to make it
385 // easy to tell which sub-case fails without debugging.
386 void AppendLanguagesToOutputs(const char* languages,
387 base::string16* expected,
388 base::string16* actual) {
389 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
390 expected->append(to_append);
391 actual->append(to_append);
392 }
393
394 // A pair of helpers for the FormatUrlWithOffsets() test. 361 // A pair of helpers for the FormatUrlWithOffsets() test.
395 void VerboseExpect(size_t expected, 362 void VerboseExpect(size_t expected,
396 size_t actual, 363 size_t actual,
397 const std::string& original_url, 364 const std::string& original_url,
398 size_t position, 365 size_t position,
399 const base::string16& formatted_url) { 366 const base::string16& formatted_url) {
400 EXPECT_EQ(expected, actual) << "Original URL: " << original_url 367 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
401 << " (at char " << position << ")\nFormatted URL: " << formatted_url; 368 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
402 } 369 }
403 370
404 void CheckAdjustedOffsets(const std::string& url_string, 371 void CheckAdjustedOffsets(const std::string& url_string,
405 const std::string& languages,
406 FormatUrlTypes format_types, 372 FormatUrlTypes format_types,
407 net::UnescapeRule::Type unescape_rules, 373 net::UnescapeRule::Type unescape_rules,
408 const size_t* output_offsets) { 374 const size_t* output_offsets) {
409 GURL url(url_string); 375 GURL url(url_string);
410 size_t url_length = url_string.length(); 376 size_t url_length = url_string.length();
411 std::vector<size_t> offsets; 377 std::vector<size_t> offsets;
412 for (size_t i = 0; i <= url_length + 1; ++i) 378 for (size_t i = 0; i <= url_length + 1; ++i)
413 offsets.push_back(i); 379 offsets.push_back(i);
414 offsets.push_back(500000); // Something larger than any input length. 380 offsets.push_back(500000); // Something larger than any input length.
415 offsets.push_back(std::string::npos); 381 offsets.push_back(std::string::npos);
416 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, 382 base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(),
417 format_types, unescape_rules, NULL, NULL, &offsets); 383 format_types, unescape_rules, NULL, NULL, &offsets);
418 for (size_t i = 0; i < url_length; ++i) 384 for (size_t i = 0; i < url_length; ++i)
419 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); 385 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
420 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, 386 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
421 url_length, formatted_url); 387 url_length, formatted_url);
422 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, 388 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
423 500000, formatted_url); 389 500000, formatted_url);
424 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, 390 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
425 std::string::npos, formatted_url); 391 std::string::npos, formatted_url);
426 } 392 }
427 393
428 TEST(UrlFormatterTest, IDNToUnicodeFast) { 394 TEST(UrlFormatterTest, IDNToUnicode) {
429 for (size_t i = 0; i < arraysize(idn_cases); i++) { 395 for (size_t i = 0; i < arraysize(idn_cases); i++) {
430 for (size_t j = 0; j < arraysize(kLanguages); j++) { 396 base::string16 output(IDNToUnicode(idn_cases[i].input, std::string()));
431 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow 397 base::string16 expected(idn_cases[i].unicode_allowed
432 if (j == 3 || j == 17 || j == 18) 398 ? WideToUTF16(idn_cases[i].unicode_output)
433 continue; 399 : ASCIIToUTF16(idn_cases[i].input));
434 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); 400 EXPECT_EQ(expected, output) << "input # " << i << ": \""
435 base::string16 expected(idn_cases[i].unicode_allowed[j] ? 401 << idn_cases[i].input << "\"";
436 WideToUTF16(idn_cases[i].unicode_output) :
437 ASCIIToUTF16(idn_cases[i].input));
438 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
439 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
440 << "\", languages: \"" << kLanguages[j]
441 << "\"";
442 }
443 }
444 }
445
446 TEST(UrlFormatterTest, IDNToUnicodeSlow) {
447 for (size_t i = 0; i < arraysize(idn_cases); i++) {
448 for (size_t j = 0; j < arraysize(kLanguages); j++) {
449 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
450 if (!(j == 3 || j == 17 || j == 18))
451 continue;
452 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
453 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
454 WideToUTF16(idn_cases[i].unicode_output) :
455 ASCIIToUTF16(idn_cases[i].input));
456 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
457 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
458 << "\", languages: \"" << kLanguages[j]
459 << "\"";
460 }
461 }
462 }
463
464 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
465 // te), which was causing a crash (See http://crbug.com/510551). This may be an
466 // icu bug, but regardless, that should not cause a crash.
467 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) {
468 for (char c1 = 'a'; c1 <= 'z'; c1++) {
469 for (char c2 = 'a'; c2 <= 'z'; c2++) {
470 std::string lang = base::StringPrintf("%c%c", c1, c2);
471 base::string16 output(IDNToUnicode("xn--74h", lang));
472 }
473 } 402 }
474 } 403 }
475 404
476 TEST(UrlFormatterTest, FormatUrl) { 405 TEST(UrlFormatterTest, FormatUrl) {
477 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; 406 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
478 const UrlTestData tests[] = { 407 const UrlTestData tests[] = {
479 {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", 408 {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"",
480 0}, 409 0},
481 410
482 {"Simple URL", "http://www.google.com/", "", default_format_type, 411 {"Simple URL", "http://www.google.com/", default_format_type,
483 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, 412 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7},
484 413
485 {"With a port number and a reference", 414 {"With a port number and a reference",
486 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, 415 "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type,
487 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, 416 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
488 417
489 // -------- IDN tests -------- 418 // -------- IDN tests --------
490 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", 419 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp",
491 default_format_type, net::UnescapeRule::NORMAL, 420 default_format_type, net::UnescapeRule::NORMAL,
492 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, 421 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
493 422
494 {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", 423 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp",
495 default_format_type, net::UnescapeRule::NORMAL,
496 L"http://xn--l8jvb1ey91xtjb.jp/", 7},
497
498 {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "",
499 default_format_type, net::UnescapeRule::NORMAL,
500 // Single script is safe for empty languages.
501 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
502
503 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja",
504 default_format_type, net::UnescapeRule::NORMAL, 424 default_format_type, net::UnescapeRule::NORMAL,
505 // GURL doesn't assume an email address's domain part as a host name. 425 // GURL doesn't assume an email address's domain part as a host name.
506 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, 426 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
507 427
508 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", 428 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
509 "ja", default_format_type, net::UnescapeRule::NORMAL, 429 default_format_type, net::UnescapeRule::NORMAL,
510 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, 430 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
511 431
512 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", 432 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys",
513 default_format_type, net::UnescapeRule::NORMAL, 433 default_format_type, net::UnescapeRule::NORMAL,
514 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, 434 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
515 435
516 // -------- omit_username_password flag tests -------- 436 // -------- omit_username_password flag tests --------
517 {"With username and password, omit_username_password=false", 437 {"With username and password, omit_username_password=false",
518 "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, 438 "http://user:passwd@example.com/foo", kFormatUrlOmitNothing,
519 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, 439 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
520 440
521 {"With username and password, omit_username_password=true", 441 {"With username and password, omit_username_password=true",
522 "http://user:passwd@example.com/foo", "", default_format_type, 442 "http://user:passwd@example.com/foo", default_format_type,
523 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, 443 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7},
524 444
525 {"With username and no password", "http://user@example.com/foo", "", 445 {"With username and no password", "http://user@example.com/foo",
526 default_format_type, net::UnescapeRule::NORMAL, 446 default_format_type, net::UnescapeRule::NORMAL,
527 L"http://example.com/foo", 7}, 447 L"http://example.com/foo", 7},
528 448
529 {"Just '@' without username and password", "http://@example.com/foo", "", 449 {"Just '@' without username and password", "http://@example.com/foo",
530 default_format_type, net::UnescapeRule::NORMAL, 450 default_format_type, net::UnescapeRule::NORMAL,
531 L"http://example.com/foo", 7}, 451 L"http://example.com/foo", 7},
532 452
533 // GURL doesn't think local-part of an email address is username for URL. 453 // GURL doesn't think local-part of an email address is username for URL.
534 {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", 454 {"mailto:, omit_username_password=true", "mailto:foo@example.com",
535 default_format_type, net::UnescapeRule::NORMAL, 455 default_format_type, net::UnescapeRule::NORMAL,
536 L"mailto:foo@example.com", 7}, 456 L"mailto:foo@example.com", 7},
537 457
538 // -------- unescape flag tests -------- 458 // -------- unescape flag tests --------
539 {"Do not unescape", 459 {"Do not unescape",
540 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 460 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
541 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 461 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
542 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 462 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
543 "en", default_format_type, net::UnescapeRule::NONE, 463 default_format_type, net::UnescapeRule::NONE,
544 // GURL parses %-encoded hostnames into Punycode. 464 // GURL parses %-encoded hostnames into Punycode.
545 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 465 L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB "
546 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 466 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
547 7}, 467 7},
548 468
549 {"Unescape normally", 469 {"Unescape normally",
550 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 470 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
551 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 471 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
552 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 472 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
553 "en", default_format_type, net::UnescapeRule::NORMAL, 473 default_format_type, net::UnescapeRule::NORMAL,
554 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" 474 L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB"
555 L"?q=\x30B0\x30FC\x30B0\x30EB", 475 L"?q=\x30B0\x30FC\x30B0\x30EB",
556 7}, 476 7},
557 477
558 {"Unescape normally with BiDi control character", 478 {"Unescape normally with BiDi control character",
559 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", 479 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy",
560 default_format_type, net::UnescapeRule::NORMAL, 480 default_format_type, net::UnescapeRule::NORMAL,
561 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, 481 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
562 482
563 {"Unescape normally including unescape spaces", 483 {"Unescape normally including unescape spaces",
564 "http://www.google.com/search?q=Hello%20World", "en", 484 "http://www.google.com/search?q=Hello%20World",
565 default_format_type, net::UnescapeRule::SPACES, 485 default_format_type, net::UnescapeRule::SPACES,
566 L"http://www.google.com/search?q=Hello World", 7}, 486 L"http://www.google.com/search?q=Hello World", 7},
567 487
568 /* 488 /*
569 {"unescape=true with some special characters", 489 {"unescape=true with some special characters",
570 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", 490 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z",
571 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 491 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
572 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, 492 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
573 */ 493 */
574 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". 494 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
575 495
576 // -------- omit http: -------- 496 // -------- omit http: --------
577 {"omit http with user name", "http://user@example.com/foo", "", 497 {"omit http with user name", "http://user@example.com/foo",
578 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, 498 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0},
579 499
580 {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, 500 {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP,
581 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, 501 net::UnescapeRule::NORMAL, L"www.google.com/", 0},
582 502
583 {"omit http with https", "https://www.google.com/", "en", 503 {"omit http with https", "https://www.google.com/",
584 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, 504 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL,
585 L"https://www.google.com/", 8}, 505 L"https://www.google.com/", 8},
586 506
587 {"omit http starts with ftp.", "http://ftp.google.com/", "en", 507 {"omit http starts with ftp.", "http://ftp.google.com/",
588 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", 508 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/",
589 7}, 509 7},
590 510
591 // -------- omit trailing slash on bare hostname -------- 511 // -------- omit trailing slash on bare hostname --------
592 {"omit slash when it's the entire path", "http://www.google.com/", "en", 512 {"omit slash when it's the entire path", "http://www.google.com/",
593 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 513 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
594 L"http://www.google.com", 7}, 514 L"http://www.google.com", 7},
595 {"omit slash when there's a ref", "http://www.google.com/#ref", "en", 515 {"omit slash when there's a ref", "http://www.google.com/#ref",
596 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 516 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
597 L"http://www.google.com/#ref", 7}, 517 L"http://www.google.com/#ref", 7},
598 {"omit slash when there's a query", "http://www.google.com/?", "en", 518 {"omit slash when there's a query", "http://www.google.com/?",
599 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 519 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
600 L"http://www.google.com/?", 7}, 520 L"http://www.google.com/?", 7},
601 {"omit slash when it's not the entire path", "http://www.google.com/foo", 521 {"omit slash when it's not the entire path", "http://www.google.com/foo",
602 "en", kFormatUrlOmitTrailingSlashOnBareHostname, 522 kFormatUrlOmitTrailingSlashOnBareHostname,
603 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, 523 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7},
604 {"omit slash for nonstandard URLs", "data:/", "en", 524 {"omit slash for nonstandard URLs", "data:/",
605 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 525 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
606 L"data:/", 5}, 526 L"data:/", 5},
607 {"omit slash for file URLs", "file:///", "en", 527 {"omit slash for file URLs", "file:///",
608 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 528 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
609 L"file:///", 7}, 529 L"file:///", 7},
610 530
611 // -------- view-source: -------- 531 // -------- view-source: --------
612 {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", 532 {"view-source", "view-source:http://xn--qcka1pmc.jp/",
613 default_format_type, net::UnescapeRule::NORMAL, 533 default_format_type, net::UnescapeRule::NORMAL,
614 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, 534 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
615 535
616 {"view-source of view-source", 536 {"view-source of view-source",
617 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", 537 "view-source:view-source:http://xn--qcka1pmc.jp/",
618 default_format_type, net::UnescapeRule::NORMAL, 538 default_format_type, net::UnescapeRule::NORMAL,
619 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, 539 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
620 540
621 // view-source should omit http and trailing slash where non-view-source 541 // view-source should omit http and trailing slash where non-view-source
622 // would. 542 // would.
623 {"view-source omit http", "view-source:http://a.b/c", "en", 543 {"view-source omit http", "view-source:http://a.b/c",
624 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, 544 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
625 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", 545 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
626 "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, 546 kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
627 L"view-source:http://ftp.b/c", 19}, 547 L"view-source:http://ftp.b/c", 19},
628 {"view-source omit slash when it's the entire path", 548 {"view-source omit slash when it's the entire path",
629 "view-source:http://a.b/", "en", kFormatUrlOmitAll, 549 "view-source:http://a.b/", kFormatUrlOmitAll,
630 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, 550 net::UnescapeRule::NORMAL, L"view-source:a.b", 12},
631 }; 551 };
632 552
633 for (size_t i = 0; i < arraysize(tests); ++i) { 553 for (size_t i = 0; i < arraysize(tests); ++i) {
634 size_t prefix_len; 554 size_t prefix_len;
635 base::string16 formatted = FormatUrl( 555 base::string16 formatted = FormatUrl(
636 GURL(tests[i].input), tests[i].languages, tests[i].format_types, 556 GURL(tests[i].input), std::string(), tests[i].format_types,
637 tests[i].escape_rules, NULL, &prefix_len, NULL); 557 tests[i].escape_rules, NULL, &prefix_len, NULL);
638 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; 558 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
639 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; 559 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
640 } 560 }
641 } 561 }
642 562
643 TEST(UrlFormatterTest, FormatUrlParsed) { 563 TEST(UrlFormatterTest, FormatUrlParsed) {
644 // No unescape case. 564 // No unescape case.
645 url::Parsed parsed; 565 url::Parsed parsed;
646 base::string16 formatted = 566 base::string16 formatted =
647 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 567 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
648 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 568 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
649 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, 569 std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE,
650 NULL, NULL); 570 &parsed, NULL, NULL);
651 EXPECT_EQ(WideToUTF16( 571 EXPECT_EQ(WideToUTF16(
652 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 572 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
653 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); 573 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
654 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), 574 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
655 formatted.substr(parsed.username.begin, parsed.username.len)); 575 formatted.substr(parsed.username.begin, parsed.username.len));
656 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), 576 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
657 formatted.substr(parsed.password.begin, parsed.password.len)); 577 formatted.substr(parsed.password.begin, parsed.password.len));
658 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 578 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
659 formatted.substr(parsed.host.begin, parsed.host.len)); 579 formatted.substr(parsed.host.begin, parsed.host.len));
660 EXPECT_EQ(WideToUTF16(L"8080"), 580 EXPECT_EQ(WideToUTF16(L"8080"),
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
855 if (test_char && 775 if (test_char &&
856 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { 776 strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
857 EXPECT_NE(url.spec(), GURL(formatted).spec()); 777 EXPECT_NE(url.spec(), GURL(formatted).spec());
858 } else { 778 } else {
859 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 779 EXPECT_EQ(url.spec(), GURL(formatted).spec());
860 } 780 }
861 } 781 }
862 } 782 }
863 783
864 TEST(UrlFormatterTest, FormatUrlWithOffsets) { 784 TEST(UrlFormatterTest, FormatUrlWithOffsets) {
865 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, 785 CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing,
866 net::UnescapeRule::NORMAL, NULL); 786 net::UnescapeRule::NORMAL, NULL);
867 787
868 const size_t basic_offsets[] = { 788 const size_t basic_offsets[] = {
869 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 789 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
870 21, 22, 23, 24, 25 790 21, 22, 23, 24, 25
871 }; 791 };
872 CheckAdjustedOffsets("http://www.google.com/foo/", "en", 792 CheckAdjustedOffsets("http://www.google.com/foo/",
873 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 793 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
874 basic_offsets); 794 basic_offsets);
875 795
876 const size_t omit_auth_offsets_1[] = { 796 const size_t omit_auth_offsets_1[] = {
877 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 797 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
878 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 798 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
879 }; 799 };
880 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", 800 CheckAdjustedOffsets("http://foo:bar@www.google.com/",
881 kFormatUrlOmitUsernamePassword, 801 kFormatUrlOmitUsernamePassword,
882 net::UnescapeRule::NORMAL, omit_auth_offsets_1); 802 net::UnescapeRule::NORMAL, omit_auth_offsets_1);
883 803
884 const size_t omit_auth_offsets_2[] = { 804 const size_t omit_auth_offsets_2[] = {
885 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, 805 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
886 15, 16, 17, 18, 19, 20, 21 806 15, 16, 17, 18, 19, 20, 21
887 }; 807 };
888 CheckAdjustedOffsets("http://foo@www.google.com/", "en", 808 CheckAdjustedOffsets("http://foo@www.google.com/",
889 kFormatUrlOmitUsernamePassword, 809 kFormatUrlOmitUsernamePassword,
890 net::UnescapeRule::NORMAL, omit_auth_offsets_2); 810 net::UnescapeRule::NORMAL, omit_auth_offsets_2);
891 811
892 const size_t dont_omit_auth_offsets[] = { 812 const size_t dont_omit_auth_offsets[] = {
893 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 813 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
894 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 814 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
895 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 815 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
896 30, 31 816 30, 31
897 }; 817 };
898 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". 818 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
899 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", 819 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/",
900 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 820 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
901 dont_omit_auth_offsets); 821 dont_omit_auth_offsets);
902 822
903 const size_t view_source_offsets[] = { 823 const size_t view_source_offsets[] = {
904 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, 824 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
905 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 825 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
906 }; 826 };
907 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", 827 CheckAdjustedOffsets("view-source:http://foo@www.google.com/",
908 kFormatUrlOmitUsernamePassword, 828 kFormatUrlOmitUsernamePassword,
909 net::UnescapeRule::NORMAL, view_source_offsets); 829 net::UnescapeRule::NORMAL, view_source_offsets);
910 830
911 const size_t idn_hostname_offsets_1[] = { 831 const size_t idn_hostname_offsets_1[] = {
912 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 832 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
913 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 833 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
914 13, 14, 15, 16, 17, 18, 19 834 13, 14, 15, 16, 17, 18, 19
915 }; 835 };
916 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". 836 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
917 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", 837 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/",
918 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 838 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
919 idn_hostname_offsets_1); 839 idn_hostname_offsets_1);
920 840
921 const size_t idn_hostname_offsets_2[] = { 841 const size_t idn_hostname_offsets_2[] = {
922 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, 842 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
923 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, 843 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
924 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 844 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
925 kNpos, 19, 20, 21, 22, 23, 24 845 kNpos, 19, 20, 21, 22, 23, 24
926 }; 846 };
927 // Convert punycode to 847 // Convert punycode to
928 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". 848 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
929 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", 849 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
930 "zh-CN", kFormatUrlOmitNothing, 850 kFormatUrlOmitNothing,
931 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); 851 net::UnescapeRule::NORMAL, idn_hostname_offsets_2);
932 852
933 const size_t unescape_offsets[] = { 853 const size_t unescape_offsets[] = {
934 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 854 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
935 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, 855 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
936 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, 856 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
937 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 857 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
938 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos 858 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
939 }; 859 };
940 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". 860 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
941 CheckAdjustedOffsets( 861 CheckAdjustedOffsets(
942 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 862 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
943 "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); 863 kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
944 864
945 const size_t ref_offsets[] = { 865 const size_t ref_offsets[] = {
946 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 866 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
947 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 867 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
948 33 868 33
949 }; 869 };
950 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". 870 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
951 CheckAdjustedOffsets( 871 CheckAdjustedOffsets(
952 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", 872 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z",
953 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); 873 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets);
954 874
955 const size_t omit_http_offsets[] = { 875 const size_t omit_http_offsets[] = {
956 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 876 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
957 10, 11, 12, 13, 14 877 10, 11, 12, 13, 14
958 }; 878 };
959 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, 879 CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP,
960 net::UnescapeRule::NORMAL, omit_http_offsets); 880 net::UnescapeRule::NORMAL, omit_http_offsets);
961 881
962 const size_t omit_http_start_with_ftp_offsets[] = { 882 const size_t omit_http_start_with_ftp_offsets[] = {
963 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 883 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
964 }; 884 };
965 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, 885 CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP,
966 net::UnescapeRule::NORMAL, 886 net::UnescapeRule::NORMAL,
967 omit_http_start_with_ftp_offsets); 887 omit_http_start_with_ftp_offsets);
968 888
969 const size_t omit_all_offsets[] = { 889 const size_t omit_all_offsets[] = {
970 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 890 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
971 0, 1, 2, 3, 4, 5, 6, 7 891 0, 1, 2, 3, 4, 5, 6, 7
972 }; 892 };
973 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, 893 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
974 net::UnescapeRule::NORMAL, omit_all_offsets); 894 net::UnescapeRule::NORMAL, omit_all_offsets);
975 } 895 }
976 896
977 } // namespace 897 } // namespace
978 898
979 } // namespace url_formatter 899 } // namespace url_formatter
OLDNEW
« no previous file with comments | « components/url_formatter/url_formatter.cc ('k') | url/url_canon_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698