OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 | 9 |
10 #include <vector> | 10 #include <vector> |
11 | 11 |
12 #include "base/macros.h" | 12 #include "base/macros.h" |
13 #include "base/strings/string_number_conversions.h" | 13 #include "base/strings/string_number_conversions.h" |
14 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" |
15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
16 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
17 #include "url/gurl.h" | 17 #include "url/gurl.h" |
18 | 18 |
19 | 19 |
20 namespace url_formatter { | 20 namespace url_formatter { |
21 | 21 |
22 namespace { | 22 namespace { |
23 | 23 |
24 using base::WideToUTF16; | 24 using base::WideToUTF16; |
25 using base::ASCIIToUTF16; | 25 using base::ASCIIToUTF16; |
26 | 26 |
27 const size_t kNpos = base::string16::npos; | 27 const size_t kNpos = base::string16::npos; |
28 | 28 |
29 const char* const kLanguages[] = { | |
30 "", "en", "zh-CN", "ja", "ko", | |
31 "he", "ar", "ru", "el", "fr", | |
32 "de", "pt", "sv", "th", "hi", | |
33 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", | |
34 "zh,ru,en" | |
35 }; | |
36 | |
37 struct IDNTestCase { | 29 struct IDNTestCase { |
38 const char* const input; | 30 const char* const input; |
39 const wchar_t* unicode_output; | 31 const wchar_t* unicode_output; |
40 const bool unicode_allowed[arraysize(kLanguages)]; | 32 const bool unicode_allowed; |
41 }; | 33 }; |
42 | 34 |
43 // TODO(jungshik) This is just a random sample of languages and is far | |
44 // from exhaustive. We may have to generate all the combinations | |
45 // of languages (powerset of a set of all the languages). | |
46 const IDNTestCase idn_cases[] = { | 35 const IDNTestCase idn_cases[] = { |
47 // No IDN | 36 // No IDN |
48 {"www.google.com", L"www.google.com", | 37 {"www.google.com", L"www.google.com", true}, |
49 {true, true, true, true, true, | 38 {"www.google.com.", L"www.google.com.", true}, |
50 true, true, true, true, true, | 39 {".", L".", true}, |
51 true, true, true, true, true, | 40 {"", L"", true}, |
52 true, true, true, true, true, | |
53 true}}, | |
54 {"www.google.com.", L"www.google.com.", | |
55 {true, true, true, true, true, | |
56 true, true, true, true, true, | |
57 true, true, true, true, true, | |
58 true, true, true, true, true, | |
59 true}}, | |
60 {".", L".", | |
61 {true, true, true, true, true, | |
62 true, true, true, true, true, | |
63 true, true, true, true, true, | |
64 true, true, true, true, true, | |
65 true}}, | |
66 {"", L"", | |
67 {true, true, true, true, true, | |
68 true, true, true, true, true, | |
69 true, true, true, true, true, | |
70 true, true, true, true, true, | |
71 true}}, | |
72 // IDN | 41 // IDN |
73 // Hanzi (Traditional Chinese) | 42 // Hanzi (Traditional Chinese) |
74 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", | 43 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, |
75 {true, false, true, true, false, | 44 // Hanzi ('video' in Simplified Chinese |
76 false, false, false, false, false, | 45 {"xn--cy2a840a.com", L"\x89c6\x9891.com", true}, |
77 false, false, false, false, false, | |
78 false, false, true, true, false, | |
79 true}}, | |
80 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) | |
81 {"xn--cy2a840a.com", L"\x89c6\x9891.com", | |
82 {true, false, true, false, false, | |
83 false, false, false, false, false, | |
84 false, false, false, false, false, | |
85 false, false, false, false, false, | |
86 true}}, | |
87 // Hanzi + '123' | 46 // Hanzi + '123' |
88 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", | 47 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", true}, |
89 {true, false, true, true, false, | 48 // Hanzi + Latin : U+56FD is simplified |
90 false, false, false, false, false, | 49 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true}, |
91 false, false, false, false, false, | |
92 false, false, true, true, false, | |
93 true}}, | |
94 // Hanzi + Latin : U+56FD is simplified and is regarded | |
95 // as not supported in zh-TW. | |
96 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", | |
97 {false, false, true, true, false, | |
98 false, false, false, false, false, | |
99 false, false, false, false, false, | |
100 false, false, false, true, false, | |
101 true}}, | |
102 // Kanji + Kana (Japanese) | 50 // Kanji + Kana (Japanese) |
103 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", | 51 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true}, |
104 {true, false, false, true, false, | |
105 false, false, false, false, false, | |
106 false, false, false, false, false, | |
107 false, false, false, true, false, | |
108 false}}, | |
109 // Katakana including U+30FC | 52 // Katakana including U+30FC |
110 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", | 53 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true}, |
111 {true, false, false, true, false, | 54 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true}, |
112 false, false, false, false, false, | |
113 false, false, false, false, false, | |
114 false, false, false, true, false, | |
115 }}, | |
116 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", | |
117 {true, false, false, true, false, | |
118 false, false, false, false, false, | |
119 false, false, false, false, false, | |
120 false, false, false, true, false, | |
121 }}, | |
122 // Katakana + Latin (Japanese) | 55 // Katakana + Latin (Japanese) |
123 // TODO(jungshik): Change 'false' in the first element to 'true' | 56 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true}, |
124 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead | 57 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true}, |
125 // of our IsIDNComponentInSingleScript(). | |
126 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", | |
127 {false, false, false, true, false, | |
128 false, false, false, false, false, | |
129 false, false, false, false, false, | |
130 false, false, false, true, false, | |
131 }}, | |
132 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", | |
133 {false, false, false, true, false, | |
134 false, false, false, false, false, | |
135 false, false, false, false, false, | |
136 false, false, false, true, false, | |
137 }}, | |
138 // Hangul (Korean) | 58 // Hangul (Korean) |
139 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", | 59 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true}, |
140 {true, false, false, false, true, | |
141 false, false, false, false, false, | |
142 false, false, false, false, false, | |
143 false, false, false, true, false, | |
144 false}}, | |
145 // b<u-umlaut>cher (German) | 60 // b<u-umlaut>cher (German) |
146 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", | 61 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", true}, |
147 {true, false, false, false, false, | |
148 false, false, false, false, true, | |
149 true, false, false, false, false, | |
150 true, false, false, false, false, | |
151 false}}, | |
152 // a with diaeresis | 62 // a with diaeresis |
153 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", | 63 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true}, |
154 {true, false, false, false, false, | |
155 false, false, false, false, false, | |
156 true, false, true, false, false, | |
157 true, false, false, false, false, | |
158 false}}, | |
159 // c-cedilla (French) | 64 // c-cedilla (French) |
160 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", | 65 {"www.xn--alliancefranaise-npb.fr", |
161 {true, false, false, false, false, | 66 L"www.alliancefran\x00e7" L"aise.fr", true}, |
162 false, false, false, false, true, | |
163 false, true, false, false, false, | |
164 false, false, false, false, false, | |
165 false}}, | |
166 // caf'e with acute accent' (French) | 67 // caf'e with acute accent' (French) |
167 {"xn--caf-dma.fr", L"caf\x00e9.fr", | 68 {"xn--caf-dma.fr", L"caf\x00e9.fr", true}, |
168 {true, false, false, false, false, | |
169 false, false, false, false, true, | |
170 false, true, true, false, false, | |
171 false, false, false, false, false, | |
172 false}}, | |
173 // c-cedillla and a with tilde (Portuguese) | 69 // c-cedillla and a with tilde (Portuguese) |
174 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", | 70 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true}, |
175 {true, false, false, false, false, | |
176 false, false, false, false, false, | |
177 false, true, false, false, false, | |
178 false, false, false, false, false, | |
179 false}}, | |
180 // s with caron | 71 // s with caron |
181 {"xn--achy-f6a.com", L"\x0161" L"achy.com", | 72 {"xn--achy-f6a.com", L"\x0161" L"achy.com", true}, |
182 {true, false, false, false, false, | |
183 false, false, false, false, false, | |
184 false, false, false, false, false, | |
185 false, false, false, false, false, | |
186 false}}, | |
187 // TODO(jungshik) : Add examples with Cyrillic letters | |
188 // only used in some languages written in Cyrillic. | |
189 // Eutopia (Greek) | |
190 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", | 73 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", |
191 {true, false, false, false, false, | 74 true}, |
192 false, false, false, true, false, | |
193 false, false, false, false, false, | |
194 false, true, false, false, false, | |
195 false}}, | |
196 // Eutopia + 123 (Greek) | 75 // Eutopia + 123 (Greek) |
197 {"xn---123-pldm0haj2bk.gr", | 76 {"xn---123-pldm0haj2bk.gr", |
198 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", | 77 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true}, |
199 {true, false, false, false, false, | |
200 false, false, false, true, false, | |
201 false, false, false, false, false, | |
202 false, true, false, false, false, | |
203 false}}, | |
204 // Cyrillic (Russian) | 78 // Cyrillic (Russian) |
205 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", | 79 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true}, |
206 {true, false, false, false, false, | |
207 false, false, true, false, false, | |
208 false, false, false, false, false, | |
209 false, false, false, false, true, | |
210 true}}, | |
211 // Cyrillic + 123 (Russian) | 80 // Cyrillic + 123 (Russian) |
212 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", | 81 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true}, |
213 {true, false, false, false, false, | 82 // 'president' in Russian. Is a wholescript confusable, but allowed. |
214 false, false, true, false, false, | 83 {"xn--d1abbgf6aiiy.xn--p1ai", |
215 false, false, false, false, false, | 84 L"\x043f\x0440\x0435\x0437\x0438\x0434\x0435\x043d\x0442.\x0440\x0444", |
216 false, false, false, false, true, | 85 true}, |
217 true}}, | |
218 // Arabic | 86 // Arabic |
219 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", | 87 {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true}, |
220 {true, false, false, false, false, | |
221 false, true, false, false, false, | |
222 false, false, false, false, false, | |
223 false, false, false, false, false, | |
224 false}}, | |
225 // Hebrew | 88 // Hebrew |
226 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", | 89 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true}, |
227 {true, false, false, false, false, | 90 // Hebrew + Common |
228 true, false, false, false, false, | 91 {"xn---123-ptf2c5c6bt.il", L"\x05e2\x05d1\x05e8\x05d9\x05ea-123.il", true}, |
229 false, false, false, false, false, | |
230 false, false, false, false, true, | |
231 false}}, | |
232 // Thai | 92 // Thai |
233 {"xn--12c2cc4ag3b4ccu.th", | 93 {"xn--12c2cc4ag3b4ccu.th", |
234 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", | 94 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, |
235 {true, false, false, false, false, | 95 // Thai + Common |
236 false, false, false, false, false, | 96 {"xn---123-9goxcp8c9db2r.th", |
237 false, false, false, true, false, | 97 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, |
238 false, false, false, false, false, | |
239 false}}, | |
240 // Devangari (Hindi) | 98 // Devangari (Hindi) |
241 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", | 99 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, |
242 {true, false, false, false, false, | 100 // Devanagari + Common |
243 false, false, false, false, false, | 101 {"xn---123-kbjl2j0bl2k.in", |
244 false, false, false, false, true, | 102 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, |
245 false, false, false, false, false, | 103 |
246 false}}, | 104 // 5 Aspirational scripts |
247 // Invalid IDN | 105 // Unifieid Canadian Syllabary |
248 {"xn--hello?world.com", NULL, | 106 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, |
249 {false, false, false, false, false, | 107 // Tifinagh |
250 false, false, false, false, false, | 108 {"xn--4ljxa2bb4a6bxb.ma", |
251 false, false, false, false, false, | 109 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, |
252 false, false, false, false, false, | 110 // Tifinagh with a disallowed character(U+2D6F) |
253 false}}, | 111 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, |
254 // Unsafe IDNs | 112 // Yi |
| 113 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, |
| 114 // Mongolian - 'ordu' (place, camp) |
| 115 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true}, |
| 116 // Mongolian with a disallowed character |
| 117 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false}, |
| 118 // Miao/Pollad |
| 119 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, |
| 120 |
| 121 // Script mixing tests |
| 122 // The following script combinations are allowed. |
| 123 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. |
| 124 // ASCII-Latin + Japn (Kana + Han) |
| 125 // ASCII-Latin + Kore (Hangul + Han) |
| 126 // ASCII-Latin + Han + Bopomofo |
| 127 // ASCII-Latin + any allowed script other than Cyrillic, Greek and Cherokee |
255 // "payp<alpha>l.com" | 128 // "payp<alpha>l.com" |
256 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", | 129 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, |
257 {false, false, false, false, false, | |
258 false, false, false, false, false, | |
259 false, false, false, false, false, | |
260 false, false, false, false, false, | |
261 false}}, | |
262 // google.gr with Greek omicron and epsilon | 130 // google.gr with Greek omicron and epsilon |
263 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", | 131 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false}, |
264 {false, false, false, false, false, | |
265 false, false, false, false, false, | |
266 false, false, false, false, false, | |
267 false, false, false, false, false, | |
268 false}}, | |
269 // google.ru with Cyrillic o | 132 // google.ru with Cyrillic o |
270 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", | 133 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false}, |
271 {false, false, false, false, false, | |
272 false, false, false, false, false, | |
273 false, false, false, false, false, | |
274 false, false, false, false, false, | |
275 false}}, | |
276 // h<e with acute>llo<China in Han>.cn | 134 // h<e with acute>llo<China in Han>.cn |
277 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", | 135 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false}, |
278 {false, false, false, false, false, | |
279 false, false, false, false, false, | |
280 false, false, false, false, false, | |
281 false, false, false, false, false, | |
282 false}}, | |
283 // <Greek rho><Cyrillic a><Cyrillic u>.ru | 136 // <Greek rho><Cyrillic a><Cyrillic u>.ru |
284 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", | 137 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false}, |
285 {false, false, false, false, false, | 138 // Hangul + Latin |
286 false, false, false, false, false, | 139 {"xn--han-eb9ll88m.kr", L"\xd55c\xae00han.kr", true}, |
287 false, false, false, false, false, | 140 // Hangul + Latin + Han with IDN ccTLD |
288 false, false, false, false, false, | 141 {"xn--han-or0kq92gkm3c.xn--3e0b707e", |
289 false}}, | 142 L"\xd55c\xae00han\x97d3.\xd55c\xad6d", true}, |
| 143 // non-ASCII Latin + Hangul |
| 144 {"xn--caf-dma9024xvpg.kr", L"caf\x00e9\xce74\xd398.kr", false}, |
| 145 // Hangul + Hiragana |
| 146 {"xn--y9j3b9855e.kr", L"\xd55c\x3072\x3089.kr", false}, |
| 147 // <Hiragana>.<Hangul> is allowed because script mixing check is per label. |
| 148 {"xn--y9j3b.xn--3e0b707e", L"\x3072\x3089.\xd55c\xad6d", true}, |
| 149 // Traditional Han + Latin |
| 150 {"xn--hanzi-u57ii69i.tw", L"\x6f22\x5b57hanzi.tw", true}, |
| 151 // Simplified Han + Latin |
| 152 {"xn--hanzi-u57i952h.cn", L"\x6c49\x5b57hanzi.cn", true}, |
| 153 // Simplified Han + Traditonal Han |
| 154 {"xn--hanzi-if9kt8n.cn", L"\x6c49\x6f22hanzi.cn", true}, |
| 155 // Han + Hiragana + Katakana + Latin |
| 156 {"xn--kanji-ii4dpizfq59yuykqr4b.jp", |
| 157 L"\x632f\x308a\x4eee\x540d\x30ab\x30bfkanji.jp", true}, |
| 158 // Han + Bopomofo |
| 159 {"xn--5ekcde0577e87tc.tw", L"\x6ce8\x97f3\x3105\x3106\x3107\x3108.tw", true}, |
| 160 // Han + Latin + Bopomofo |
| 161 {"xn--bopo-ty4cghi8509kk7xd.tw", |
| 162 L"\x6ce8\x97f3" L"bopo\x3105\x3106\x3107\x3108.tw", true}, |
| 163 // Latin + Bopomofo |
| 164 {"xn--bopomofo-hj5gkalm.tw", L"bopomofo\x3105\x3106\x3107\x3108.tw", true}, |
| 165 // Bopomofo + Katakana |
| 166 {"xn--lcka3d1bztghi.tw", |
| 167 L"\x3105\x3106\x3107\x3108\x30ab\x30bf\x30ab\x30ca.tw", false}, |
| 168 // Bopomofo + Hangul |
| 169 {"xn--5ekcde4543qbec.tw", L"\x3105\x3106\x3107\x3108\xc8fc\xc74c.tw", false}, |
| 170 // Devanagari + Latin |
| 171 {"xn--ab-3ofh8fqbj6h.in", L"ab\x0939\x093f\x0928\x094d\x0926\x0940.in", true}, |
| 172 // Thai + Latin |
| 173 {"xn--ab-jsi9al4bxdb6n.th", |
| 174 L"ab\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22.th", true}, |
| 175 // <vitamin in Katakana>b1.com |
| 176 {"xn--b1-xi4a7cvc9f.com", L"\x30d3\x30bf\x30df\x30f3" L"b1.com", true}, |
| 177 // Devanagari + Han |
| 178 {"xn--t2bes3ds6749n.com", L"\x0930\x094b\x0932\x0947\x76e7\x0938.com", false}, |
| 179 // Devanagari + Bengali |
| 180 {"xn--11b0x.in", L"\x0915\x0995.in", false}, |
| 181 |
| 182 // Invisibility check |
| 183 // Thai tone mark malek(U+0E48) repeated |
| 184 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, |
| 185 // Accute accent repeated |
| 186 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, |
| 187 // 'a' with acuted accent + another acute accent |
| 188 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, |
| 189 |
| 190 // Mixed script confusable |
| 191 // google with Armenian Small Letter Oh(U+0585) |
| 192 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, |
| 193 // Hiragana HE(U+3078) mixed with Katakana |
| 194 {"xn--49jxi3as0d0fpc.com", |
| 195 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, |
| 196 // U+30FC + Han |
| 197 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, |
| 198 // Han + U+30FC + Han |
| 199 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, |
| 200 // Latin + U+30FC + Latin |
| 201 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, |
| 202 // Latin + U+30FB + Latin |
| 203 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, |
| 204 // U+30FB + Latin |
| 205 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, |
| 206 |
| 207 // Mixed digits: the first two will also fail mixed script test |
| 208 // Latin + ASCII digit + Deva digit |
| 209 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, |
| 210 // Latin + Deva digit + Beng digit |
| 211 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, |
| 212 // ASCII digit + Deva digit |
| 213 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, |
| 214 // Deva digit + Beng digit |
| 215 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, |
| 216 // U+4E00 (CJK Ideograph One) is not a digit |
| 217 {"xn--d12-s18d.cn", L"d12\x4e00.cn", true}, |
290 // One that's really long that will force a buffer realloc | 218 // One that's really long that will force a buffer realloc |
291 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | 219 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
292 "aaaaaaa", | 220 "aaaaaaa", |
293 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" | 221 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
294 L"aaaaaaaa", | 222 L"aaaaaaaa", |
295 {true, true, true, true, true, | 223 true}, |
296 true, true, true, true, true, | 224 |
297 true, true, true, true, true, | 225 // Not allowed; characters outside [:Identifier_Status=Allowed:] |
298 true, true, true, true, true, | 226 // Limited Use Scripts: UTS 31 Table 7. |
299 true}}, | 227 // Vai |
300 // Test cases for characters we blacklisted although allowed in IDN. | 228 {"xn--sn8a.com", L"\xa50b.com", false}, |
301 // Embedded spaces will be turned to %20 in the display. | 229 // 'CARD' look-alike in Cherokee |
302 // TODO(jungshik): We need to have more cases. This is a typical | 230 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false}, |
303 // data-driven trap. The following test cases need to be separated | 231 // Scripts excluded from Identifiers: UTS 31 Table 4 |
304 // and tested only for a couple of languages. | 232 // Coptic |
305 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", | 233 {"xn--5ya.com", L"\x03e7.com", false}, |
306 {false, false, false, false, false, | 234 // Old Italic |
307 false, false, false, false, false, | 235 {"xn--097cc.com", L"\U00010300\U00010301.com", false}, |
308 false, false, false, false, false, | 236 |
309 false, false, false, false, false, | 237 // U+115F (Hangul Filler) |
310 false}}, | 238 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, |
311 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", | 239 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, |
312 {false, false, false, false, false, | 240 // Latin small capital w |
313 false, false, false, false, false, | 241 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false}, |
314 false, false, false, false, false, | 242 // Minus Sign(U+2212) |
315 false, false, false, false, false, | 243 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false}, |
316 }}, | 244 // Latin Small Letter Script G |
317 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", | 245 {"xn--0naa.com", L"\x0261\x0261.com", false}, |
318 {false, false, false, false, false, | 246 // Hangul Jamo(U+11xx) |
319 false, false, false, false, false, | 247 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false}, |
320 false, false, false, false, false, | 248 // degree sign |
321 false, false, false, false, false, | 249 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false}, |
322 }}, | 250 // Pound sign |
323 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", | 251 {"xn--5free-9ga.com", L"5free\x00a8.com", false}, |
324 {false, false, false, false, false, | 252 // Hebrew points (U+05B0, U+05B6) |
325 false, false, false, false, false, | 253 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false}, |
326 false, false, false, false, false, | 254 // Danda(U+0964) |
327 false, false, false, false, false, | 255 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false}, |
328 }}, | 256 // Small letter script G(U+0261) |
329 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", | 257 {"xn--oogle-qmc.com", L"\x0261oogle.com", false}, |
330 {false, false, false, false, false, | 258 // Small Katakana Extension(U+31F1) |
331 false, false, false, false, false, | 259 {"xn--wlk.com", L"\x31f1.com", false}, |
332 false, false, false, false, false, | 260 // Heart symbol |
333 false, false, false, false, false, | 261 {"xn--ab-u0x.com", L"ab\x2665.com", false}, |
334 }}, | 262 // Emoji |
| 263 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false}, |
| 264 // Registered trade mark |
| 265 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false}, |
| 266 // Latin Letter Retroflex Click |
| 267 {"xn--registered-25c.com", L"registered\x01c3.com", false}, |
| 268 // ASCII '!' not allowed in IDN |
| 269 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false}, |
| 270 // 'GOOGLE' in IPA extension |
| 271 {"xn--1naa7pn51hcbaa.com", |
| 272 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false}, |
335 // Padlock icon spoof. | 273 // Padlock icon spoof. |
336 {"xn--google-hj64e", L"\U0001f512google.com", | 274 {"xn--google-hj64e", L"\U0001f512google.com", false}, |
337 {false, false, false, false, false, | 275 |
338 false, false, false, false, false, | 276 // Custom black list |
339 false, false, false, false, false, | 277 // Combining Long Solidus Overlay |
340 false, false, false, false, false, | 278 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false}, |
341 }}, | 279 // Hyphenation Point instead of Katakana Middle dot |
342 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist | 280 {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false}, |
343 // all strings with the surrogate '\xdd12'. | 281 // Gershayim with other Hebrew characters is allowed. |
344 {"xn--fk9c.com", L"\U00010912.com", | 282 {"xn--5db6bh9b.il", L"\x05e9\x05d1\x05f4\x05e6.il", true}, |
345 {true, false, false, false, false, | 283 // Hebrew Gershayim with Latin is disallowed. |
346 false, false, false, false, false, | 284 {"xn--ab-yod.com", L"a\x05f4" L"b.com", false}, |
347 false, false, false, false, false, | 285 // Hebrew Gershayim with Arabic is disallowed. |
348 false, false, false, false, false, | 286 {"xn--5eb7h.eg", L"\x0628\x05f4.eg", false}, |
349 }}, | 287 |
350 #if 0 | 288 // Custom dangerous patterns |
351 // These two cases are special. We need a separate test. | 289 // Two Katakana-Hiragana combining mark in a row |
352 // U+3000 and U+3002 are normalized to ASCII space and dot. | 290 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false}, |
353 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", | 291 // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}. |
354 {false, false, true, false, false, | 292 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", false}, |
355 false, false, false, false, false, | 293 // TODO(jshin): Review the danger of allowing the following two. |
356 false, false, false, false, false, | 294 // Hiragana 'No' by itself is allowed. |
357 false, false, true, false, false, | 295 {"xn--ldk.jp", L"\x30ce.jp", true}, |
358 true}}, | 296 // Hebrew Gershayim used by itself is allowed. |
359 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", | 297 {"xn--5eb.il", L"\x05f4.il", true}, |
360 {false, false, true, false, false, | 298 |
361 false, false, false, false, false, | 299 // 4 Deviation characters between IDNA 2003 and IDNA 2008 |
362 false, false, false, false, false, | 300 // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma |
363 false, false, true, false, false, | 301 // and the latter two are mapped away. However, the punycode form should |
364 true}}, | 302 // remain in punycode. |
365 #endif | 303 // U+00DF(sharp-s) |
| 304 {"xn--fu-hia.de", L"fu\x00df.de", false}, |
| 305 // U+03C2(final-sigma) |
| 306 {"xn--mxac2c.gr", L"\x03b1\x03b2\x03c2.gr", false}, |
| 307 // U+200C(ZWNJ) |
| 308 {"xn--h2by8byc123p.in", L"\x0924\x094d\x200c\x0930\x093f.in", false}, |
| 309 // U+200C(ZWJ) |
| 310 {"xn--11b6iy14e.in", L"\x0915\x094d\x200d.in", false}, |
| 311 |
| 312 // Math Monospace Small A. When entered in Unicode, it's canonicalized to |
| 313 // 'a'. The punycode form should remain in punycode. |
| 314 {"xn--bc-9x80a.xyz", L"\U0001d68a" L"bc.xyz", false}, |
| 315 // Math Sans Bold Capital Alpha |
| 316 {"xn--bc-rg90a.xyz", L"\U0001d756" L"bc.xyz", false}, |
| 317 // U+3000 is canonicalized to a space(U+0020), but the punycode form |
| 318 // should remain in punycode. |
| 319 {"xn--p6j412gn7f.cn", L"\x4e2d\x56fd\x3000", false}, |
| 320 // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form |
| 321 // should remain in punycode. |
| 322 {"xn--r6j012gn7f.cn", L"\x4e2d\x56fd\x3002", false}, |
| 323 // Invalid punycode |
| 324 // Has a codepoint beyond U+10FFFF. |
| 325 {"xn--krank-kg706554a", nullptr, false}, |
| 326 // '?' in punycode. |
| 327 {"xn--hello?world.com", nullptr, false}, |
| 328 |
| 329 // Not allowed in UTS46/IDNA 2008 |
| 330 // Georgian Capital Letter(U+10BD) |
| 331 {"xn--1nd.com", L"\x10bd.com", false}, |
| 332 // 3rd and 4th characters are '-'. |
| 333 {"xn-----8kci4dhsd", L"\x0440\x0443--\x0430\x0432\x0442\x043e", false}, |
| 334 // Leading combining mark |
| 335 {"xn--72b.com", L"\x093e.com", false}, |
| 336 // BiDi check per IDNA 2008/UTS 46 |
| 337 // Cannot starts with AN(Arabic-Indic Number) |
| 338 {"xn--8hbae.eg", L"\x0662\x0660\x0660.eg", false}, |
| 339 // Cannot start with a RTL character and ends with a LTR |
| 340 {"xn--x-ymcov.eg", L"\x062c\x0627\x0631x.eg", false}, |
| 341 // Can start with a RTL character and ends with EN(European Number) |
| 342 {"xn--2-ymcov.eg", L"\x062c\x0627\x0631" L"2.eg", true}, |
| 343 // Can start with a RTL and end with AN |
| 344 {"xn--mgbjq0r.eg", L"\x062c\x0627\x0631\x0662.eg", true}, |
366 }; | 345 }; |
367 | 346 |
368 struct AdjustOffsetCase { | 347 struct AdjustOffsetCase { |
369 size_t input_offset; | 348 size_t input_offset; |
370 size_t output_offset; | 349 size_t output_offset; |
371 }; | 350 }; |
372 | 351 |
373 struct UrlTestData { | 352 struct UrlTestData { |
374 const char* const description; | 353 const char* const description; |
375 const char* const input; | 354 const char* const input; |
376 const char* const languages; | |
377 FormatUrlTypes format_types; | 355 FormatUrlTypes format_types; |
378 net::UnescapeRule::Type escape_rules; | 356 net::UnescapeRule::Type escape_rules; |
379 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. | 357 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. |
380 size_t prefix_len; | 358 size_t prefix_len; |
381 }; | 359 }; |
382 | 360 |
383 // A helper for IDN*{Fast,Slow}. | |
384 // Append "::<language list>" to |expected| and |actual| to make it | |
385 // easy to tell which sub-case fails without debugging. | |
386 void AppendLanguagesToOutputs(const char* languages, | |
387 base::string16* expected, | |
388 base::string16* actual) { | |
389 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); | |
390 expected->append(to_append); | |
391 actual->append(to_append); | |
392 } | |
393 | |
394 // A pair of helpers for the FormatUrlWithOffsets() test. | 361 // A pair of helpers for the FormatUrlWithOffsets() test. |
395 void VerboseExpect(size_t expected, | 362 void VerboseExpect(size_t expected, |
396 size_t actual, | 363 size_t actual, |
397 const std::string& original_url, | 364 const std::string& original_url, |
398 size_t position, | 365 size_t position, |
399 const base::string16& formatted_url) { | 366 const base::string16& formatted_url) { |
400 EXPECT_EQ(expected, actual) << "Original URL: " << original_url | 367 EXPECT_EQ(expected, actual) << "Original URL: " << original_url |
401 << " (at char " << position << ")\nFormatted URL: " << formatted_url; | 368 << " (at char " << position << ")\nFormatted URL: " << formatted_url; |
402 } | 369 } |
403 | 370 |
404 void CheckAdjustedOffsets(const std::string& url_string, | 371 void CheckAdjustedOffsets(const std::string& url_string, |
405 const std::string& languages, | |
406 FormatUrlTypes format_types, | 372 FormatUrlTypes format_types, |
407 net::UnescapeRule::Type unescape_rules, | 373 net::UnescapeRule::Type unescape_rules, |
408 const size_t* output_offsets) { | 374 const size_t* output_offsets) { |
409 GURL url(url_string); | 375 GURL url(url_string); |
410 size_t url_length = url_string.length(); | 376 size_t url_length = url_string.length(); |
411 std::vector<size_t> offsets; | 377 std::vector<size_t> offsets; |
412 for (size_t i = 0; i <= url_length + 1; ++i) | 378 for (size_t i = 0; i <= url_length + 1; ++i) |
413 offsets.push_back(i); | 379 offsets.push_back(i); |
414 offsets.push_back(500000); // Something larger than any input length. | 380 offsets.push_back(500000); // Something larger than any input length. |
415 offsets.push_back(std::string::npos); | 381 offsets.push_back(std::string::npos); |
416 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, | 382 base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(), |
417 format_types, unescape_rules, NULL, NULL, &offsets); | 383 format_types, unescape_rules, NULL, NULL, &offsets); |
418 for (size_t i = 0; i < url_length; ++i) | 384 for (size_t i = 0; i < url_length; ++i) |
419 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); | 385 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); |
420 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, | 386 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, |
421 url_length, formatted_url); | 387 url_length, formatted_url); |
422 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, | 388 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, |
423 500000, formatted_url); | 389 500000, formatted_url); |
424 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, | 390 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, |
425 std::string::npos, formatted_url); | 391 std::string::npos, formatted_url); |
426 } | 392 } |
427 | 393 |
428 TEST(UrlFormatterTest, IDNToUnicodeFast) { | 394 TEST(UrlFormatterTest, IDNToUnicode) { |
429 for (size_t i = 0; i < arraysize(idn_cases); i++) { | 395 for (size_t i = 0; i < arraysize(idn_cases); i++) { |
430 for (size_t j = 0; j < arraysize(kLanguages); j++) { | 396 base::string16 output(IDNToUnicode(idn_cases[i].input, std::string())); |
431 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow | 397 base::string16 expected(idn_cases[i].unicode_allowed |
432 if (j == 3 || j == 17 || j == 18) | 398 ? WideToUTF16(idn_cases[i].unicode_output) |
433 continue; | 399 : ASCIIToUTF16(idn_cases[i].input)); |
434 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | 400 EXPECT_EQ(expected, output) << "input # " << i << ": \"" |
435 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | 401 << idn_cases[i].input << "\""; |
436 WideToUTF16(idn_cases[i].unicode_output) : | |
437 ASCIIToUTF16(idn_cases[i].input)); | |
438 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
439 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input | |
440 << "\", languages: \"" << kLanguages[j] | |
441 << "\""; | |
442 } | |
443 } | |
444 } | |
445 | |
446 TEST(UrlFormatterTest, IDNToUnicodeSlow) { | |
447 for (size_t i = 0; i < arraysize(idn_cases); i++) { | |
448 for (size_t j = 0; j < arraysize(kLanguages); j++) { | |
449 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast | |
450 if (!(j == 3 || j == 17 || j == 18)) | |
451 continue; | |
452 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); | |
453 base::string16 expected(idn_cases[i].unicode_allowed[j] ? | |
454 WideToUTF16(idn_cases[i].unicode_output) : | |
455 ASCIIToUTF16(idn_cases[i].input)); | |
456 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); | |
457 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input | |
458 << "\", languages: \"" << kLanguages[j] | |
459 << "\""; | |
460 } | |
461 } | |
462 } | |
463 | |
464 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and | |
465 // te), which was causing a crash (See http://crbug.com/510551). This may be an | |
466 // icu bug, but regardless, that should not cause a crash. | |
467 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) { | |
468 for (char c1 = 'a'; c1 <= 'z'; c1++) { | |
469 for (char c2 = 'a'; c2 <= 'z'; c2++) { | |
470 std::string lang = base::StringPrintf("%c%c", c1, c2); | |
471 base::string16 output(IDNToUnicode("xn--74h", lang)); | |
472 } | |
473 } | 402 } |
474 } | 403 } |
475 | 404 |
476 TEST(UrlFormatterTest, FormatUrl) { | 405 TEST(UrlFormatterTest, FormatUrl) { |
477 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; | 406 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; |
478 const UrlTestData tests[] = { | 407 const UrlTestData tests[] = { |
479 {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", | 408 {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"", |
480 0}, | 409 0}, |
481 | 410 |
482 {"Simple URL", "http://www.google.com/", "", default_format_type, | 411 {"Simple URL", "http://www.google.com/", default_format_type, |
483 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, | 412 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, |
484 | 413 |
485 {"With a port number and a reference", | 414 {"With a port number and a reference", |
486 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, | 415 "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type, |
487 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, | 416 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, |
488 | 417 |
489 // -------- IDN tests -------- | 418 // -------- IDN tests -------- |
490 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", | 419 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", |
491 default_format_type, net::UnescapeRule::NORMAL, | 420 default_format_type, net::UnescapeRule::NORMAL, |
492 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | 421 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, |
493 | 422 |
494 {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", | 423 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", |
495 default_format_type, net::UnescapeRule::NORMAL, | |
496 L"http://xn--l8jvb1ey91xtjb.jp/", 7}, | |
497 | |
498 {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "", | |
499 default_format_type, net::UnescapeRule::NORMAL, | |
500 // Single script is safe for empty languages. | |
501 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, | |
502 | |
503 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", | |
504 default_format_type, net::UnescapeRule::NORMAL, | 424 default_format_type, net::UnescapeRule::NORMAL, |
505 // GURL doesn't assume an email address's domain part as a host name. | 425 // GURL doesn't assume an email address's domain part as a host name. |
506 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, | 426 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, |
507 | 427 |
508 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", | 428 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", |
509 "ja", default_format_type, net::UnescapeRule::NORMAL, | 429 default_format_type, net::UnescapeRule::NORMAL, |
510 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, | 430 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, |
511 | 431 |
512 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", | 432 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", |
513 default_format_type, net::UnescapeRule::NORMAL, | 433 default_format_type, net::UnescapeRule::NORMAL, |
514 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, | 434 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, |
515 | 435 |
516 // -------- omit_username_password flag tests -------- | 436 // -------- omit_username_password flag tests -------- |
517 {"With username and password, omit_username_password=false", | 437 {"With username and password, omit_username_password=false", |
518 "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, | 438 "http://user:passwd@example.com/foo", kFormatUrlOmitNothing, |
519 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, | 439 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, |
520 | 440 |
521 {"With username and password, omit_username_password=true", | 441 {"With username and password, omit_username_password=true", |
522 "http://user:passwd@example.com/foo", "", default_format_type, | 442 "http://user:passwd@example.com/foo", default_format_type, |
523 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, | 443 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, |
524 | 444 |
525 {"With username and no password", "http://user@example.com/foo", "", | 445 {"With username and no password", "http://user@example.com/foo", |
526 default_format_type, net::UnescapeRule::NORMAL, | 446 default_format_type, net::UnescapeRule::NORMAL, |
527 L"http://example.com/foo", 7}, | 447 L"http://example.com/foo", 7}, |
528 | 448 |
529 {"Just '@' without username and password", "http://@example.com/foo", "", | 449 {"Just '@' without username and password", "http://@example.com/foo", |
530 default_format_type, net::UnescapeRule::NORMAL, | 450 default_format_type, net::UnescapeRule::NORMAL, |
531 L"http://example.com/foo", 7}, | 451 L"http://example.com/foo", 7}, |
532 | 452 |
533 // GURL doesn't think local-part of an email address is username for URL. | 453 // GURL doesn't think local-part of an email address is username for URL. |
534 {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", | 454 {"mailto:, omit_username_password=true", "mailto:foo@example.com", |
535 default_format_type, net::UnescapeRule::NORMAL, | 455 default_format_type, net::UnescapeRule::NORMAL, |
536 L"mailto:foo@example.com", 7}, | 456 L"mailto:foo@example.com", 7}, |
537 | 457 |
538 // -------- unescape flag tests -------- | 458 // -------- unescape flag tests -------- |
539 {"Do not unescape", | 459 {"Do not unescape", |
540 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 460 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
541 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 461 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
542 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 462 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
543 "en", default_format_type, net::UnescapeRule::NONE, | 463 default_format_type, net::UnescapeRule::NONE, |
544 // GURL parses %-encoded hostnames into Punycode. | 464 // GURL parses %-encoded hostnames into Punycode. |
545 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 465 L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB
" |
546 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 466 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
547 7}, | 467 7}, |
548 | 468 |
549 {"Unescape normally", | 469 {"Unescape normally", |
550 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" | 470 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" |
551 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" | 471 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" |
552 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 472 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
553 "en", default_format_type, net::UnescapeRule::NORMAL, | 473 default_format_type, net::UnescapeRule::NORMAL, |
554 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" | 474 L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB" |
555 L"?q=\x30B0\x30FC\x30B0\x30EB", | 475 L"?q=\x30B0\x30FC\x30B0\x30EB", |
556 7}, | 476 7}, |
557 | 477 |
558 {"Unescape normally with BiDi control character", | 478 {"Unescape normally with BiDi control character", |
559 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", | 479 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", |
560 default_format_type, net::UnescapeRule::NORMAL, | 480 default_format_type, net::UnescapeRule::NORMAL, |
561 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, | 481 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, |
562 | 482 |
563 {"Unescape normally including unescape spaces", | 483 {"Unescape normally including unescape spaces", |
564 "http://www.google.com/search?q=Hello%20World", "en", | 484 "http://www.google.com/search?q=Hello%20World", |
565 default_format_type, net::UnescapeRule::SPACES, | 485 default_format_type, net::UnescapeRule::SPACES, |
566 L"http://www.google.com/search?q=Hello World", 7}, | 486 L"http://www.google.com/search?q=Hello World", 7}, |
567 | 487 |
568 /* | 488 /* |
569 {"unescape=true with some special characters", | 489 {"unescape=true with some special characters", |
570 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", | 490 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", |
571 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 491 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
572 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, | 492 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, |
573 */ | 493 */ |
574 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". | 494 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". |
575 | 495 |
576 // -------- omit http: -------- | 496 // -------- omit http: -------- |
577 {"omit http with user name", "http://user@example.com/foo", "", | 497 {"omit http with user name", "http://user@example.com/foo", |
578 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, | 498 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, |
579 | 499 |
580 {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, | 500 {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP, |
581 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, | 501 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, |
582 | 502 |
583 {"omit http with https", "https://www.google.com/", "en", | 503 {"omit http with https", "https://www.google.com/", |
584 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, | 504 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, |
585 L"https://www.google.com/", 8}, | 505 L"https://www.google.com/", 8}, |
586 | 506 |
587 {"omit http starts with ftp.", "http://ftp.google.com/", "en", | 507 {"omit http starts with ftp.", "http://ftp.google.com/", |
588 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", | 508 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", |
589 7}, | 509 7}, |
590 | 510 |
591 // -------- omit trailing slash on bare hostname -------- | 511 // -------- omit trailing slash on bare hostname -------- |
592 {"omit slash when it's the entire path", "http://www.google.com/", "en", | 512 {"omit slash when it's the entire path", "http://www.google.com/", |
593 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 513 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
594 L"http://www.google.com", 7}, | 514 L"http://www.google.com", 7}, |
595 {"omit slash when there's a ref", "http://www.google.com/#ref", "en", | 515 {"omit slash when there's a ref", "http://www.google.com/#ref", |
596 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 516 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
597 L"http://www.google.com/#ref", 7}, | 517 L"http://www.google.com/#ref", 7}, |
598 {"omit slash when there's a query", "http://www.google.com/?", "en", | 518 {"omit slash when there's a query", "http://www.google.com/?", |
599 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 519 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
600 L"http://www.google.com/?", 7}, | 520 L"http://www.google.com/?", 7}, |
601 {"omit slash when it's not the entire path", "http://www.google.com/foo", | 521 {"omit slash when it's not the entire path", "http://www.google.com/foo", |
602 "en", kFormatUrlOmitTrailingSlashOnBareHostname, | 522 kFormatUrlOmitTrailingSlashOnBareHostname, |
603 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, | 523 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, |
604 {"omit slash for nonstandard URLs", "data:/", "en", | 524 {"omit slash for nonstandard URLs", "data:/", |
605 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 525 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
606 L"data:/", 5}, | 526 L"data:/", 5}, |
607 {"omit slash for file URLs", "file:///", "en", | 527 {"omit slash for file URLs", "file:///", |
608 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, | 528 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, |
609 L"file:///", 7}, | 529 L"file:///", 7}, |
610 | 530 |
611 // -------- view-source: -------- | 531 // -------- view-source: -------- |
612 {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", | 532 {"view-source", "view-source:http://xn--qcka1pmc.jp/", |
613 default_format_type, net::UnescapeRule::NORMAL, | 533 default_format_type, net::UnescapeRule::NORMAL, |
614 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, | 534 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, |
615 | 535 |
616 {"view-source of view-source", | 536 {"view-source of view-source", |
617 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", | 537 "view-source:view-source:http://xn--qcka1pmc.jp/", |
618 default_format_type, net::UnescapeRule::NORMAL, | 538 default_format_type, net::UnescapeRule::NORMAL, |
619 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, | 539 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, |
620 | 540 |
621 // view-source should omit http and trailing slash where non-view-source | 541 // view-source should omit http and trailing slash where non-view-source |
622 // would. | 542 // would. |
623 {"view-source omit http", "view-source:http://a.b/c", "en", | 543 {"view-source omit http", "view-source:http://a.b/c", |
624 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, | 544 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, |
625 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", | 545 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", |
626 "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, | 546 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, |
627 L"view-source:http://ftp.b/c", 19}, | 547 L"view-source:http://ftp.b/c", 19}, |
628 {"view-source omit slash when it's the entire path", | 548 {"view-source omit slash when it's the entire path", |
629 "view-source:http://a.b/", "en", kFormatUrlOmitAll, | 549 "view-source:http://a.b/", kFormatUrlOmitAll, |
630 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, | 550 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, |
631 }; | 551 }; |
632 | 552 |
633 for (size_t i = 0; i < arraysize(tests); ++i) { | 553 for (size_t i = 0; i < arraysize(tests); ++i) { |
634 size_t prefix_len; | 554 size_t prefix_len; |
635 base::string16 formatted = FormatUrl( | 555 base::string16 formatted = FormatUrl( |
636 GURL(tests[i].input), tests[i].languages, tests[i].format_types, | 556 GURL(tests[i].input), std::string(), tests[i].format_types, |
637 tests[i].escape_rules, NULL, &prefix_len, NULL); | 557 tests[i].escape_rules, NULL, &prefix_len, NULL); |
638 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; | 558 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; |
639 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; | 559 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; |
640 } | 560 } |
641 } | 561 } |
642 | 562 |
643 TEST(UrlFormatterTest, FormatUrlParsed) { | 563 TEST(UrlFormatterTest, FormatUrlParsed) { |
644 // No unescape case. | 564 // No unescape case. |
645 url::Parsed parsed; | 565 url::Parsed parsed; |
646 base::string16 formatted = | 566 base::string16 formatted = |
647 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" | 567 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" |
648 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), | 568 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), |
649 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, | 569 std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE, |
650 NULL, NULL); | 570 &parsed, NULL, NULL); |
651 EXPECT_EQ(WideToUTF16( | 571 EXPECT_EQ(WideToUTF16( |
652 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" | 572 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" |
653 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); | 573 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); |
654 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), | 574 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), |
655 formatted.substr(parsed.username.begin, parsed.username.len)); | 575 formatted.substr(parsed.username.begin, parsed.username.len)); |
656 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), | 576 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), |
657 formatted.substr(parsed.password.begin, parsed.password.len)); | 577 formatted.substr(parsed.password.begin, parsed.password.len)); |
658 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), | 578 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), |
659 formatted.substr(parsed.host.begin, parsed.host.len)); | 579 formatted.substr(parsed.host.begin, parsed.host.len)); |
660 EXPECT_EQ(WideToUTF16(L"8080"), | 580 EXPECT_EQ(WideToUTF16(L"8080"), |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
855 if (test_char && | 775 if (test_char && |
856 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { | 776 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { |
857 EXPECT_NE(url.spec(), GURL(formatted).spec()); | 777 EXPECT_NE(url.spec(), GURL(formatted).spec()); |
858 } else { | 778 } else { |
859 EXPECT_EQ(url.spec(), GURL(formatted).spec()); | 779 EXPECT_EQ(url.spec(), GURL(formatted).spec()); |
860 } | 780 } |
861 } | 781 } |
862 } | 782 } |
863 | 783 |
864 TEST(UrlFormatterTest, FormatUrlWithOffsets) { | 784 TEST(UrlFormatterTest, FormatUrlWithOffsets) { |
865 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, | 785 CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing, |
866 net::UnescapeRule::NORMAL, NULL); | 786 net::UnescapeRule::NORMAL, NULL); |
867 | 787 |
868 const size_t basic_offsets[] = { | 788 const size_t basic_offsets[] = { |
869 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 789 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
870 21, 22, 23, 24, 25 | 790 21, 22, 23, 24, 25 |
871 }; | 791 }; |
872 CheckAdjustedOffsets("http://www.google.com/foo/", "en", | 792 CheckAdjustedOffsets("http://www.google.com/foo/", |
873 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 793 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
874 basic_offsets); | 794 basic_offsets); |
875 | 795 |
876 const size_t omit_auth_offsets_1[] = { | 796 const size_t omit_auth_offsets_1[] = { |
877 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, | 797 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, |
878 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 798 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
879 }; | 799 }; |
880 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", | 800 CheckAdjustedOffsets("http://foo:bar@www.google.com/", |
881 kFormatUrlOmitUsernamePassword, | 801 kFormatUrlOmitUsernamePassword, |
882 net::UnescapeRule::NORMAL, omit_auth_offsets_1); | 802 net::UnescapeRule::NORMAL, omit_auth_offsets_1); |
883 | 803 |
884 const size_t omit_auth_offsets_2[] = { | 804 const size_t omit_auth_offsets_2[] = { |
885 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, | 805 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, |
886 15, 16, 17, 18, 19, 20, 21 | 806 15, 16, 17, 18, 19, 20, 21 |
887 }; | 807 }; |
888 CheckAdjustedOffsets("http://foo@www.google.com/", "en", | 808 CheckAdjustedOffsets("http://foo@www.google.com/", |
889 kFormatUrlOmitUsernamePassword, | 809 kFormatUrlOmitUsernamePassword, |
890 net::UnescapeRule::NORMAL, omit_auth_offsets_2); | 810 net::UnescapeRule::NORMAL, omit_auth_offsets_2); |
891 | 811 |
892 const size_t dont_omit_auth_offsets[] = { | 812 const size_t dont_omit_auth_offsets[] = { |
893 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 813 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
894 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 814 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
895 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | 815 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, |
896 30, 31 | 816 30, 31 |
897 }; | 817 }; |
898 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". | 818 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". |
899 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", | 819 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", |
900 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 820 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
901 dont_omit_auth_offsets); | 821 dont_omit_auth_offsets); |
902 | 822 |
903 const size_t view_source_offsets[] = { | 823 const size_t view_source_offsets[] = { |
904 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, | 824 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, |
905 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 | 825 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 |
906 }; | 826 }; |
907 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", | 827 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", |
908 kFormatUrlOmitUsernamePassword, | 828 kFormatUrlOmitUsernamePassword, |
909 net::UnescapeRule::NORMAL, view_source_offsets); | 829 net::UnescapeRule::NORMAL, view_source_offsets); |
910 | 830 |
911 const size_t idn_hostname_offsets_1[] = { | 831 const size_t idn_hostname_offsets_1[] = { |
912 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 832 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
913 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, | 833 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, |
914 13, 14, 15, 16, 17, 18, 19 | 834 13, 14, 15, 16, 17, 18, 19 |
915 }; | 835 }; |
916 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". | 836 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". |
917 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", | 837 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", |
918 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, | 838 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, |
919 idn_hostname_offsets_1); | 839 idn_hostname_offsets_1); |
920 | 840 |
921 const size_t idn_hostname_offsets_2[] = { | 841 const size_t idn_hostname_offsets_2[] = { |
922 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, | 842 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, |
923 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, | 843 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, |
924 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 844 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
925 kNpos, 19, 20, 21, 22, 23, 24 | 845 kNpos, 19, 20, 21, 22, 23, 24 |
926 }; | 846 }; |
927 // Convert punycode to | 847 // Convert punycode to |
928 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". | 848 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". |
929 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", | 849 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", |
930 "zh-CN", kFormatUrlOmitNothing, | 850 kFormatUrlOmitNothing, |
931 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); | 851 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); |
932 | 852 |
933 const size_t unescape_offsets[] = { | 853 const size_t unescape_offsets[] = { |
934 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 854 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
935 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, | 855 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, |
936 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, | 856 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, |
937 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, | 857 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, |
938 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos | 858 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos |
939 }; | 859 }; |
940 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". | 860 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". |
941 CheckAdjustedOffsets( | 861 CheckAdjustedOffsets( |
942 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", | 862 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", |
943 "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); | 863 kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); |
944 | 864 |
945 const size_t ref_offsets[] = { | 865 const size_t ref_offsets[] = { |
946 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | 866 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, |
947 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, | 867 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, |
948 33 | 868 33 |
949 }; | 869 }; |
950 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". | 870 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". |
951 CheckAdjustedOffsets( | 871 CheckAdjustedOffsets( |
952 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", | 872 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", |
953 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); | 873 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); |
954 | 874 |
955 const size_t omit_http_offsets[] = { | 875 const size_t omit_http_offsets[] = { |
956 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, | 876 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
957 10, 11, 12, 13, 14 | 877 10, 11, 12, 13, 14 |
958 }; | 878 }; |
959 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, | 879 CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP, |
960 net::UnescapeRule::NORMAL, omit_http_offsets); | 880 net::UnescapeRule::NORMAL, omit_http_offsets); |
961 | 881 |
962 const size_t omit_http_start_with_ftp_offsets[] = { | 882 const size_t omit_http_start_with_ftp_offsets[] = { |
963 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 | 883 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 |
964 }; | 884 }; |
965 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, | 885 CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP, |
966 net::UnescapeRule::NORMAL, | 886 net::UnescapeRule::NORMAL, |
967 omit_http_start_with_ftp_offsets); | 887 omit_http_start_with_ftp_offsets); |
968 | 888 |
969 const size_t omit_all_offsets[] = { | 889 const size_t omit_all_offsets[] = { |
970 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 890 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
971 0, 1, 2, 3, 4, 5, 6, 7 | 891 0, 1, 2, 3, 4, 5, 6, 7 |
972 }; | 892 }; |
973 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, | 893 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
974 net::UnescapeRule::NORMAL, omit_all_offsets); | 894 net::UnescapeRule::NORMAL, omit_all_offsets); |
975 } | 895 } |
976 | 896 |
977 } // namespace | 897 } // namespace |
978 | 898 |
979 } // namespace url_formatter | 899 } // namespace url_formatter |
OLD | NEW |