Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(150)

Side by Side Diff: components/url_formatter/url_formatter_unittest.cc

Issue 1258813002: Implement a new IDN display policy (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: url_canon test: wchar* needs a surrogate pair on *nix Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <string.h> 8 #include <string.h>
9 9
10 #include <vector> 10 #include <vector>
11 11
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/stringprintf.h" 14 #include "base/strings/stringprintf.h"
15 #include "base/strings/utf_string_conversions.h" 15 #include "base/strings/utf_string_conversions.h"
16 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "url/gurl.h" 17 #include "url/gurl.h"
18 18
19 19
20 namespace url_formatter { 20 namespace url_formatter {
21 21
22 namespace { 22 namespace {
23 23
24 using base::WideToUTF16; 24 using base::WideToUTF16;
25 using base::ASCIIToUTF16; 25 using base::ASCIIToUTF16;
26 26
27 const size_t kNpos = base::string16::npos; 27 const size_t kNpos = base::string16::npos;
28 28
29 const char* const kLanguages[] = {
30 "", "en", "zh-CN", "ja", "ko",
31 "he", "ar", "ru", "el", "fr",
32 "de", "pt", "sv", "th", "hi",
33 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
34 "zh,ru,en"
35 };
36
37 struct IDNTestCase { 29 struct IDNTestCase {
38 const char* const input; 30 const char* const input;
39 const wchar_t* unicode_output; 31 const wchar_t* unicode_output;
40 const bool unicode_allowed[arraysize(kLanguages)]; 32 const bool unicode_allowed;
41 }; 33 };
42 34
43 // TODO(jungshik) This is just a random sample of languages and is far
44 // from exhaustive. We may have to generate all the combinations
45 // of languages (powerset of a set of all the languages).
46 const IDNTestCase idn_cases[] = { 35 const IDNTestCase idn_cases[] = {
47 // No IDN 36 // No IDN
48 {"www.google.com", L"www.google.com", 37 {"www.google.com", L"www.google.com", true},
49 {true, true, true, true, true, 38 {"www.google.com.", L"www.google.com.", true},
50 true, true, true, true, true, 39 {".", L".", true},
51 true, true, true, true, true, 40 {"", L"", true},
52 true, true, true, true, true,
53 true}},
54 {"www.google.com.", L"www.google.com.",
55 {true, true, true, true, true,
56 true, true, true, true, true,
57 true, true, true, true, true,
58 true, true, true, true, true,
59 true}},
60 {".", L".",
61 {true, true, true, true, true,
62 true, true, true, true, true,
63 true, true, true, true, true,
64 true, true, true, true, true,
65 true}},
66 {"", L"",
67 {true, true, true, true, true,
68 true, true, true, true, true,
69 true, true, true, true, true,
70 true, true, true, true, true,
71 true}},
72 // IDN 41 // IDN
73 // Hanzi (Traditional Chinese) 42 // Hanzi (Traditional Chinese)
74 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", 43 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
75 {true, false, true, true, false, 44 // Hanzi ('video' in Simplified Chinese
76 false, false, false, false, false, 45 {"xn--cy2a840a.com", L"\x89c6\x9891.com", true},
77 false, false, false, false, false,
78 false, false, true, true, false,
79 true}},
80 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
81 {"xn--cy2a840a.com", L"\x89c6\x9891.com",
82 {true, false, true, false, false,
83 false, false, false, false, false,
84 false, false, false, false, false,
85 false, false, false, false, false,
86 true}},
87 // Hanzi + '123' 46 // Hanzi + '123'
88 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", 47 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", true},
89 {true, false, true, true, false, 48 // Hanzi + Latin : U+56FD is simplified
90 false, false, false, false, false, 49 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", true},
91 false, false, false, false, false,
92 false, false, true, true, false,
93 true}},
94 // Hanzi + Latin : U+56FD is simplified and is regarded
95 // as not supported in zh-TW.
96 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
97 {false, false, true, true, false,
98 false, false, false, false, false,
99 false, false, false, false, false,
100 false, false, false, true, false,
101 true}},
102 // Kanji + Kana (Japanese) 50 // Kanji + Kana (Japanese)
103 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", 51 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", true},
104 {true, false, false, true, false,
105 false, false, false, false, false,
106 false, false, false, false, false,
107 false, false, false, true, false,
108 false}},
109 // Katakana including U+30FC 52 // Katakana including U+30FC
110 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", 53 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", true},
111 {true, false, false, true, false, 54 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", true},
112 false, false, false, false, false,
113 false, false, false, false, false,
114 false, false, false, true, false,
115 }},
116 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
117 {true, false, false, true, false,
118 false, false, false, false, false,
119 false, false, false, false, false,
120 false, false, false, true, false,
121 }},
122 // Katakana + Latin (Japanese) 55 // Katakana + Latin (Japanese)
123 // TODO(jungshik): Change 'false' in the first element to 'true' 56 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", true},
124 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead 57 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", true},
125 // of our IsIDNComponentInSingleScript().
126 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
127 {false, false, false, true, false,
128 false, false, false, false, false,
129 false, false, false, false, false,
130 false, false, false, true, false,
131 }},
132 {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
133 {false, false, false, true, false,
134 false, false, false, false, false,
135 false, false, false, false, false,
136 false, false, false, true, false,
137 }},
138 // Hangul (Korean) 58 // Hangul (Korean)
139 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", 59 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", true},
140 {true, false, false, false, true,
141 false, false, false, false, false,
142 false, false, false, false, false,
143 false, false, false, true, false,
144 false}},
145 // b<u-umlaut>cher (German) 60 // b<u-umlaut>cher (German)
146 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", 61 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", true},
147 {true, false, false, false, false,
148 false, false, false, false, true,
149 true, false, false, false, false,
150 true, false, false, false, false,
151 false}},
152 // a with diaeresis 62 // a with diaeresis
153 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", 63 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", true},
154 {true, false, false, false, false,
155 false, false, false, false, false,
156 true, false, true, false, false,
157 true, false, false, false, false,
158 false}},
159 // c-cedilla (French) 64 // c-cedilla (French)
160 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", 65 {"www.xn--alliancefranaise-npb.fr",
161 {true, false, false, false, false, 66 L"www.alliancefran\x00e7" L"aise.fr", true},
162 false, false, false, false, true,
163 false, true, false, false, false,
164 false, false, false, false, false,
165 false}},
166 // caf'e with acute accent' (French) 67 // caf'e with acute accent' (French)
167 {"xn--caf-dma.fr", L"caf\x00e9.fr", 68 {"xn--caf-dma.fr", L"caf\x00e9.fr", true},
168 {true, false, false, false, false,
169 false, false, false, false, true,
170 false, true, true, false, false,
171 false, false, false, false, false,
172 false}},
173 // c-cedillla and a with tilde (Portuguese) 69 // c-cedillla and a with tilde (Portuguese)
174 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", 70 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", true},
175 {true, false, false, false, false,
176 false, false, false, false, false,
177 false, true, false, false, false,
178 false, false, false, false, false,
179 false}},
180 // s with caron 71 // s with caron
181 {"xn--achy-f6a.com", L"\x0161" L"achy.com", 72 {"xn--achy-f6a.com", L"\x0161" L"achy.com", true},
182 {true, false, false, false, false,
183 false, false, false, false, false,
184 false, false, false, false, false,
185 false, false, false, false, false,
186 false}},
187 // TODO(jungshik) : Add examples with Cyrillic letters
188 // only used in some languages written in Cyrillic.
189 // Eutopia (Greek)
190 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", 73 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
191 {true, false, false, false, false, 74 true},
192 false, false, false, true, false,
193 false, false, false, false, false,
194 false, true, false, false, false,
195 false}},
196 // Eutopia + 123 (Greek) 75 // Eutopia + 123 (Greek)
197 {"xn---123-pldm0haj2bk.gr", 76 {"xn---123-pldm0haj2bk.gr",
198 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", 77 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", true},
199 {true, false, false, false, false,
200 false, false, false, true, false,
201 false, false, false, false, false,
202 false, true, false, false, false,
203 false}},
204 // Cyrillic (Russian) 78 // Cyrillic (Russian)
205 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", 79 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", true},
206 {true, false, false, false, false,
207 false, false, true, false, false,
208 false, false, false, false, false,
209 false, false, false, false, true,
210 true}},
211 // Cyrillic + 123 (Russian) 80 // Cyrillic + 123 (Russian)
212 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", 81 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", true},
213 {true, false, false, false, false, 82 // 'president' in Russian. Is a wholescript confusable, but allowed.
214 false, false, true, false, false, 83 {"xn--d1abbgf6aiiy.xn--p1ai",
215 false, false, false, false, false, 84 L"\x043f\x0440\x0435\x0437\x0438\x0434\x0435\x043d\x0442.\x0440\x0444",
216 false, false, false, false, true, 85 true},
217 true}},
218 // Arabic 86 // Arabic
219 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", 87 {"xn--mgba1fmg.eg", L"\x0627\x0641\x0644\x0627\x0645.eg", true},
220 {true, false, false, false, false,
221 false, true, false, false, false,
222 false, false, false, false, false,
223 false, false, false, false, false,
224 false}},
225 // Hebrew 88 // Hebrew
226 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", 89 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", true},
227 {true, false, false, false, false, 90 // Hebrew + Common
228 true, false, false, false, false, 91 {"xn---123-ptf2c5c6bt.il", L"\x05e2\x05d1\x05e8\x05d9\x05ea-123.il", true},
229 false, false, false, false, false,
230 false, false, false, false, true,
231 false}},
232 // Thai 92 // Thai
233 {"xn--12c2cc4ag3b4ccu.th", 93 {"xn--12c2cc4ag3b4ccu.th",
234 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", 94 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
235 {true, false, false, false, false, 95 // Thai + Common
236 false, false, false, false, false, 96 {"xn---123-9goxcp8c9db2r.th",
237 false, false, false, true, false, 97 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true},
238 false, false, false, false, false,
239 false}},
240 // Devangari (Hindi) 98 // Devangari (Hindi)
241 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", 99 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
242 {true, false, false, false, false, 100 // Devanagari + Common
243 false, false, false, false, false, 101 {"xn---123-kbjl2j0bl2k.in",
244 false, false, false, false, true, 102 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true},
245 false, false, false, false, false, 103
246 false}}, 104 // 5 Aspirational scripts
247 // Invalid IDN 105 // Unifieid Canadian Syllabary
248 {"xn--hello?world.com", NULL, 106 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true},
249 {false, false, false, false, false, 107 // Tifinagh
250 false, false, false, false, false, 108 {"xn--4ljxa2bb4a6bxb.ma",
251 false, false, false, false, false, 109 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true},
252 false, false, false, false, false, 110 // Tifinagh with a disallowed character(U+2D6F)
253 false}}, 111 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false},
254 // Unsafe IDNs 112 // Yi
113 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true},
114 // Mongolian - 'ordu' (place, camp)
115 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true},
116 // Mongolian with a disallowed character
117 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false},
118 // Miao/Pollad
119 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true},
120
121 // Script mixing tests
122 // The following script combinations are allowed.
123 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin.
124 // ASCII-Latin + Japn (Kana + Han)
125 // ASCII-Latin + Kore (Hangul + Han)
126 // ASCII-Latin + Han + Bopomofo
127 // ASCII-Latin + any allowed script other than Cyrillic, Greek and Cherokee
255 // "payp<alpha>l.com" 128 // "payp<alpha>l.com"
256 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", 129 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
257 {false, false, false, false, false,
258 false, false, false, false, false,
259 false, false, false, false, false,
260 false, false, false, false, false,
261 false}},
262 // google.gr with Greek omicron and epsilon 130 // google.gr with Greek omicron and epsilon
263 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", 131 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", false},
264 {false, false, false, false, false,
265 false, false, false, false, false,
266 false, false, false, false, false,
267 false, false, false, false, false,
268 false}},
269 // google.ru with Cyrillic o 132 // google.ru with Cyrillic o
270 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", 133 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", false},
271 {false, false, false, false, false,
272 false, false, false, false, false,
273 false, false, false, false, false,
274 false, false, false, false, false,
275 false}},
276 // h<e with acute>llo<China in Han>.cn 134 // h<e with acute>llo<China in Han>.cn
277 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", 135 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", false},
278 {false, false, false, false, false,
279 false, false, false, false, false,
280 false, false, false, false, false,
281 false, false, false, false, false,
282 false}},
283 // <Greek rho><Cyrillic a><Cyrillic u>.ru 136 // <Greek rho><Cyrillic a><Cyrillic u>.ru
284 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", 137 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", false},
285 {false, false, false, false, false, 138 // Hangul + Latin
286 false, false, false, false, false, 139 {"xn--han-eb9ll88m.kr", L"\xd55c\xae00han.kr", true},
287 false, false, false, false, false, 140 // Hangul + Latin + Han with IDN ccTLD
288 false, false, false, false, false, 141 {"xn--han-or0kq92gkm3c.xn--3e0b707e",
289 false}}, 142 L"\xd55c\xae00han\x97d3.\xd55c\xad6d", true},
143 // non-ASCII Latin + Hangul
144 {"xn--caf-dma9024xvpg.kr", L"caf\x00e9\xce74\xd398.kr", false},
145 // Hangul + Hiragana
146 {"xn--y9j3b9855e.kr", L"\xd55c\x3072\x3089.kr", false},
147 // <Hiragana>.<Hangul> is allowed because script mixing check is per label.
148 {"xn--y9j3b.xn--3e0b707e", L"\x3072\x3089.\xd55c\xad6d", true},
149 // Traditional Han + Latin
150 {"xn--hanzi-u57ii69i.tw", L"\x6f22\x5b57hanzi.tw", true},
151 // Simplified Han + Latin
152 {"xn--hanzi-u57i952h.cn", L"\x6c49\x5b57hanzi.cn", true},
153 // Simplified Han + Traditonal Han
154 {"xn--hanzi-if9kt8n.cn", L"\x6c49\x6f22hanzi.cn", true},
155 // Han + Hiragana + Katakana + Latin
156 {"xn--kanji-ii4dpizfq59yuykqr4b.jp",
157 L"\x632f\x308a\x4eee\x540d\x30ab\x30bfkanji.jp", true},
158 // Han + Bopomofo
159 {"xn--5ekcde0577e87tc.tw", L"\x6ce8\x97f3\x3105\x3106\x3107\x3108.tw", true},
160 // Han + Latin + Bopomofo
161 {"xn--bopo-ty4cghi8509kk7xd.tw",
162 L"\x6ce8\x97f3" L"bopo\x3105\x3106\x3107\x3108.tw", true},
163 // Latin + Bopomofo
164 {"xn--bopomofo-hj5gkalm.tw", L"bopomofo\x3105\x3106\x3107\x3108.tw", true},
165 // Bopomofo + Katakana
166 {"xn--lcka3d1bztghi.tw",
167 L"\x3105\x3106\x3107\x3108\x30ab\x30bf\x30ab\x30ca.tw", false},
168 // Bopomofo + Hangul
169 {"xn--5ekcde4543qbec.tw", L"\x3105\x3106\x3107\x3108\xc8fc\xc74c.tw", false},
170 // Devanagari + Latin
171 {"xn--ab-3ofh8fqbj6h.in", L"ab\x0939\x093f\x0928\x094d\x0926\x0940.in", true},
172 // Thai + Latin
173 {"xn--ab-jsi9al4bxdb6n.th",
174 L"ab\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22.th", true},
175 // <vitamin in Katakana>b1.com
176 {"xn--b1-xi4a7cvc9f.com", L"\x30d3\x30bf\x30df\x30f3" L"b1.com", true},
177 // Devanagari + Han
178 {"xn--t2bes3ds6749n.com", L"\x0930\x094b\x0932\x0947\x76e7\x0938.com", false},
179 // Devanagari + Bengali
180 {"xn--11b0x.in", L"\x0915\x0995.in", false},
181
182 // Invisibility check
183 // Thai tone mark malek(U+0E48) repeated
184 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false},
185 // Accute accent repeated
186 {"xn--a-xbba.com", L"a\x0301\x0301.com", false},
187 // 'a' with acuted accent + another acute accent
188 {"xn--1ca20i.com", L"\x00e1\x0301.com", false},
189
190 // Mixed script confusable
191 // google with Armenian Small Letter Oh(U+0585)
192 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false},
193 // Hiragana HE(U+3078) mixed with Katakana
194 {"xn--49jxi3as0d0fpc.com",
195 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false},
196 // U+30FC + Han
197 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false},
198 // Han + U+30FC + Han
199 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false},
200 // Latin + U+30FC + Latin
201 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false},
202 // Latin + U+30FB + Latin
203 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false},
204 // U+30FB + Latin
205 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false},
206
207 // Mixed digits: the first two will also fail mixed script test
208 // Latin + ASCII digit + Deva digit
209 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false},
210 // Latin + Deva digit + Beng digit
211 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false},
212 // ASCII digit + Deva digit
213 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false},
214 // Deva digit + Beng digit
215 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false},
216 // U+4E00 (CJK Ideograph One) is not a digit
217 {"xn--d12-s18d.cn", L"d12\x4e00.cn", true},
290 // One that's really long that will force a buffer realloc 218 // One that's really long that will force a buffer realloc
291 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 219 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
292 "aaaaaaa", 220 "aaaaaaa",
293 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 221 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
294 L"aaaaaaaa", 222 L"aaaaaaaa",
295 {true, true, true, true, true, 223 true},
296 true, true, true, true, true, 224
297 true, true, true, true, true, 225 // Not allowed; characters outside [:Identifier_Status=Allowed:]
298 true, true, true, true, true, 226 // Limited Use Scripts: UTS 31 Table 7.
299 true}}, 227 // Vai
300 // Test cases for characters we blacklisted although allowed in IDN. 228 {"xn--sn8a.com", L"\xa50b.com", false},
301 // Embedded spaces will be turned to %20 in the display. 229 // 'CARD' look-alike in Cherokee
302 // TODO(jungshik): We need to have more cases. This is a typical 230 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false},
303 // data-driven trap. The following test cases need to be separated 231 // Scripts excluded from Identifiers: UTS 31 Table 4
304 // and tested only for a couple of languages. 232 // Coptic
305 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", 233 {"xn--5ya.com", L"\x03e7.com", false},
306 {false, false, false, false, false, 234 // Old Italic
307 false, false, false, false, false, 235 {"xn--097cc.com", L"\U00010300\U00010301.com", false},
308 false, false, false, false, false, 236
309 false, false, false, false, false, 237 // U+115F (Hangul Filler)
310 false}}, 238 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
311 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", 239 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
312 {false, false, false, false, false, 240 // Latin small capital w
313 false, false, false, false, false, 241 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false},
314 false, false, false, false, false, 242 // Minus Sign(U+2212)
315 false, false, false, false, false, 243 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false},
316 }}, 244 // Latin Small Letter Script G
317 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", 245 {"xn--0naa.com", L"\x0261\x0261.com", false},
318 {false, false, false, false, false, 246 // Hangul Jamo(U+11xx)
319 false, false, false, false, false, 247 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false},
320 false, false, false, false, false, 248 // degree sign
321 false, false, false, false, false, 249 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false},
322 }}, 250 // Pound sign
323 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", 251 {"xn--5free-9ga.com", L"5free\x00a8.com", false},
324 {false, false, false, false, false, 252 // Hebrew points (U+05B0, U+05B6)
325 false, false, false, false, false, 253 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false},
326 false, false, false, false, false, 254 // Danda(U+0964)
327 false, false, false, false, false, 255 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false},
328 }}, 256 // Small letter script G(U+0261)
329 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", 257 {"xn--oogle-qmc.com", L"\x0261oogle.com", false},
330 {false, false, false, false, false, 258 // Small Katakana Extension(U+31F1)
331 false, false, false, false, false, 259 {"xn--wlk.com", L"\x31f1.com", false},
332 false, false, false, false, false, 260 // Heart symbol
333 false, false, false, false, false, 261 {"xn--ab-u0x.com", L"ab\x2665.com", false},
334 }}, 262 // Emoji
263 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false},
264 // Registered trade mark
265 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false},
266 // Latin Letter Retroflex Click
267 {"xn--registered-25c.com", L"registered\x01c3.com", false},
268 // ASCII '!' not allowed in IDN
269 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false},
270 // 'GOOGLE' in IPA extension
271 {"xn--1naa7pn51hcbaa.com",
272 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false},
335 // Padlock icon spoof. 273 // Padlock icon spoof.
336 {"xn--google-hj64e", L"\U0001f512google.com", 274 {"xn--google-hj64e", L"\U0001f512google.com", false},
337 {false, false, false, false, false, 275
338 false, false, false, false, false, 276 // Custom black list: Combining Long Solidus Overlay
339 false, false, false, false, false, 277 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false},
340 false, false, false, false, false, 278
341 }}, 279 // Custom dangerous patterns
342 // Ensure that blacklisting "\xd83d\xdd12" did not inadvertently blacklist 280 // Two Katakana-Hiragana combining mark in a row
343 // all strings with the surrogate '\xdd12'. 281 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", false},
344 {"xn--fk9c.com", L"\U00010912.com", 282 // Katakana Letter No not enclosed by {Han,Hiragana,Katakana}.
345 {true, false, false, false, false, 283 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", false},
346 false, false, false, false, false, 284
347 false, false, false, false, false, 285 // 4 Deviation characters between IDNA 2003 and IDNA 2008
348 false, false, false, false, false, 286 // When entered in Unicode, the first two are mapped to 'ss' and Greek sigma
349 }}, 287 // and the latter two are mapped away. However, the punycode form should
350 #if 0 288 // remain in punycode.
351 // These two cases are special. We need a separate test. 289 // U+00DF(sharp-s)
352 // U+3000 and U+3002 are normalized to ASCII space and dot. 290 {"xn--fu-hia.de", L"fu\x00df.de", false},
353 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", 291 // U+03C2(final-sigma)
354 {false, false, true, false, false, 292 {"xn--mxac2c.gr", L"\x03b1\x03b2\x03c2.gr", false},
355 false, false, false, false, false, 293 // U+200C(ZWNJ)
356 false, false, false, false, false, 294 {"xn--h2by8byc123p.in", L"\x0924\x094d\x200c\x0930\x093f.in", false},
357 false, false, true, false, false, 295 // U+200C(ZWJ)
358 true}}, 296 {"xn--11b6iy14e.in", L"\x0915\x094d\x200d.in", false},
359 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", 297
360 {false, false, true, false, false, 298 // Math Monospace Small A. When entered in Unicode, it's canonicalized to
361 false, false, false, false, false, 299 // 'a'. The punycode form should remain in punycode.
362 false, false, false, false, false, 300 {"xn--bc-9x80a.xyz", L"\U0001d68a" L"bc.xyz", false},
363 false, false, true, false, false, 301 // Math Sans Bold Capital Alpha
364 true}}, 302 {"xn--bc-rg90a.xyz", L"\U0001d756" L"bc.xyz", false},
365 #endif 303 // U+3000 is canonicalized to a space(U+0020), but the punycode form
304 // should remain in punycode.
305 {"xn--p6j412gn7f.cn", L"\x4e2d\x56fd\x3000", false},
306 // U+3002 is canonicalized to ASCII fullstop(U+002E), but the punycode form
307 // should remain in punycode.
308 {"xn--r6j012gn7f.cn", L"\x4e2d\x56fd\x3002", false},
309 // Invalid punycode
310 // Has a codepoint beyond U+10FFFF.
311 {"xn--krank-kg706554a", nullptr, false},
312 // '?' in punycode.
313 {"xn--hello?world.com", nullptr, false},
314
315 // Not allowed in UTS46/IDNA 2008
316 // Georgian Capital Letter(U+10BD)
317 {"xn--1nd.com", L"\x10bd.com", false},
318 // 3rd and 4th characters are '-'.
319 {"xn-----8kci4dhsd", L"\x0440\x0443--\x0430\x0432\x0442\x043e", false},
320 // Leading combining mark
321 {"xn--72b.com", L"\x093e.com", false},
322 // BiDi check per IDNA 2008/UTS 46
323 // Cannot starts with AN(Arabic-Indic Number)
324 {"xn--8hbae.eg", L"\x0662\x0660\x0660.eg", false},
325 // Cannot start with a RTL character and ends with a LTR
326 {"xn--x-ymcov.eg", L"\x062c\x0627\x0631x.eg", false},
327 // Can start with a RTL character and ends with EN(European Number)
328 {"xn--2-ymcov.eg", L"\x062c\x0627\x0631" L"2.eg", true},
329 // Can start with a RTL and end with AN
330 {"xn--mgbjq0r.eg", L"\x062c\x0627\x0631\x0662.eg", true},
366 }; 331 };
367 332
368 struct AdjustOffsetCase { 333 struct AdjustOffsetCase {
369 size_t input_offset; 334 size_t input_offset;
370 size_t output_offset; 335 size_t output_offset;
371 }; 336 };
372 337
373 struct UrlTestData { 338 struct UrlTestData {
374 const char* const description; 339 const char* const description;
375 const char* const input; 340 const char* const input;
376 const char* const languages;
377 FormatUrlTypes format_types; 341 FormatUrlTypes format_types;
378 net::UnescapeRule::Type escape_rules; 342 net::UnescapeRule::Type escape_rules;
379 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. 343 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily.
380 size_t prefix_len; 344 size_t prefix_len;
381 }; 345 };
382 346
383 // A helper for IDN*{Fast,Slow}.
384 // Append "::<language list>" to |expected| and |actual| to make it
385 // easy to tell which sub-case fails without debugging.
386 void AppendLanguagesToOutputs(const char* languages,
387 base::string16* expected,
388 base::string16* actual) {
389 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
390 expected->append(to_append);
391 actual->append(to_append);
392 }
393
394 // A pair of helpers for the FormatUrlWithOffsets() test. 347 // A pair of helpers for the FormatUrlWithOffsets() test.
395 void VerboseExpect(size_t expected, 348 void VerboseExpect(size_t expected,
396 size_t actual, 349 size_t actual,
397 const std::string& original_url, 350 const std::string& original_url,
398 size_t position, 351 size_t position,
399 const base::string16& formatted_url) { 352 const base::string16& formatted_url) {
400 EXPECT_EQ(expected, actual) << "Original URL: " << original_url 353 EXPECT_EQ(expected, actual) << "Original URL: " << original_url
401 << " (at char " << position << ")\nFormatted URL: " << formatted_url; 354 << " (at char " << position << ")\nFormatted URL: " << formatted_url;
402 } 355 }
403 356
404 void CheckAdjustedOffsets(const std::string& url_string, 357 void CheckAdjustedOffsets(const std::string& url_string,
405 const std::string& languages,
406 FormatUrlTypes format_types, 358 FormatUrlTypes format_types,
407 net::UnescapeRule::Type unescape_rules, 359 net::UnescapeRule::Type unescape_rules,
408 const size_t* output_offsets) { 360 const size_t* output_offsets) {
409 GURL url(url_string); 361 GURL url(url_string);
410 size_t url_length = url_string.length(); 362 size_t url_length = url_string.length();
411 std::vector<size_t> offsets; 363 std::vector<size_t> offsets;
412 for (size_t i = 0; i <= url_length + 1; ++i) 364 for (size_t i = 0; i <= url_length + 1; ++i)
413 offsets.push_back(i); 365 offsets.push_back(i);
414 offsets.push_back(500000); // Something larger than any input length. 366 offsets.push_back(500000); // Something larger than any input length.
415 offsets.push_back(std::string::npos); 367 offsets.push_back(std::string::npos);
416 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, 368 base::string16 formatted_url = FormatUrlWithOffsets(url, std::string(),
417 format_types, unescape_rules, NULL, NULL, &offsets); 369 format_types, unescape_rules, NULL, NULL, &offsets);
418 for (size_t i = 0; i < url_length; ++i) 370 for (size_t i = 0; i < url_length; ++i)
419 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); 371 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
420 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, 372 VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
421 url_length, formatted_url); 373 url_length, formatted_url);
422 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, 374 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
423 500000, formatted_url); 375 500000, formatted_url);
424 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, 376 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
425 std::string::npos, formatted_url); 377 std::string::npos, formatted_url);
426 } 378 }
427 379
428 TEST(UrlFormatterTest, IDNToUnicodeFast) { 380 TEST(UrlFormatterTest, IDNToUnicode) {
429 for (size_t i = 0; i < arraysize(idn_cases); i++) { 381 for (size_t i = 0; i < arraysize(idn_cases); i++) {
430 for (size_t j = 0; j < arraysize(kLanguages); j++) { 382 base::string16 output(IDNToUnicode(idn_cases[i].input, std::string()));
431 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow 383 base::string16 expected(idn_cases[i].unicode_allowed
432 if (j == 3 || j == 17 || j == 18) 384 ? WideToUTF16(idn_cases[i].unicode_output)
433 continue; 385 : ASCIIToUTF16(idn_cases[i].input));
434 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); 386 EXPECT_EQ(expected, output) << "input # " << i << ": \""
435 base::string16 expected(idn_cases[i].unicode_allowed[j] ? 387 << idn_cases[i].input << "\"";
436 WideToUTF16(idn_cases[i].unicode_output) :
437 ASCIIToUTF16(idn_cases[i].input));
438 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
439 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
440 << "\", languages: \"" << kLanguages[j]
441 << "\"";
442 }
443 }
444 }
445
446 TEST(UrlFormatterTest, IDNToUnicodeSlow) {
447 for (size_t i = 0; i < arraysize(idn_cases); i++) {
448 for (size_t j = 0; j < arraysize(kLanguages); j++) {
449 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
450 if (!(j == 3 || j == 17 || j == 18))
451 continue;
452 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
453 base::string16 expected(idn_cases[i].unicode_allowed[j] ?
454 WideToUTF16(idn_cases[i].unicode_output) :
455 ASCIIToUTF16(idn_cases[i].input));
456 AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
457 EXPECT_EQ(expected, output) << "input: \"" << idn_cases[i].input
458 << "\", languages: \"" << kLanguages[j]
459 << "\"";
460 }
461 }
462 }
463
464 // ulocdata_getExemplarSet may fail with some locales (currently bn, gu, and
465 // te), which was causing a crash (See http://crbug.com/510551). This may be an
466 // icu bug, but regardless, that should not cause a crash.
467 TEST(UrlFormatterTest, IDNToUnicodeNeverCrashes) {
468 for (char c1 = 'a'; c1 <= 'z'; c1++) {
469 for (char c2 = 'a'; c2 <= 'z'; c2++) {
470 std::string lang = base::StringPrintf("%c%c", c1, c2);
471 base::string16 output(IDNToUnicode("xn--74h", lang));
472 }
473 } 388 }
474 } 389 }
475 390
476 TEST(UrlFormatterTest, FormatUrl) { 391 TEST(UrlFormatterTest, FormatUrl) {
477 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; 392 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
478 const UrlTestData tests[] = { 393 const UrlTestData tests[] = {
479 {"Empty URL", "", "", default_format_type, net::UnescapeRule::NORMAL, L"", 394 {"Empty URL", "", default_format_type, net::UnescapeRule::NORMAL, L"",
480 0}, 395 0},
481 396
482 {"Simple URL", "http://www.google.com/", "", default_format_type, 397 {"Simple URL", "http://www.google.com/", default_format_type,
483 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7}, 398 net::UnescapeRule::NORMAL, L"http://www.google.com/", 7},
484 399
485 {"With a port number and a reference", 400 {"With a port number and a reference",
486 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, 401 "http://www.google.com:8080/#\xE3\x82\xB0", default_format_type,
487 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7}, 402 net::UnescapeRule::NORMAL, L"http://www.google.com:8080/#\x30B0", 7},
488 403
489 // -------- IDN tests -------- 404 // -------- IDN tests --------
490 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp", "ja", 405 {"Japanese IDN with ja", "http://xn--l8jvb1ey91xtjb.jp",
491 default_format_type, net::UnescapeRule::NORMAL, 406 default_format_type, net::UnescapeRule::NORMAL,
492 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, 407 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
493 408
494 {"Japanese IDN with en", "http://xn--l8jvb1ey91xtjb.jp", "en", 409 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp",
495 default_format_type, net::UnescapeRule::NORMAL,
496 L"http://xn--l8jvb1ey91xtjb.jp/", 7},
497
498 {"Japanese IDN without any languages", "http://xn--l8jvb1ey91xtjb.jp", "",
499 default_format_type, net::UnescapeRule::NORMAL,
500 // Single script is safe for empty languages.
501 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
502
503 {"mailto: with Japanese IDN", "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja",
504 default_format_type, net::UnescapeRule::NORMAL, 410 default_format_type, net::UnescapeRule::NORMAL,
505 // GURL doesn't assume an email address's domain part as a host name. 411 // GURL doesn't assume an email address's domain part as a host name.
506 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, 412 L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
507 413
508 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys", 414 {"file: with Japanese IDN", "file://xn--l8jvb1ey91xtjb.jp/config.sys",
509 "ja", default_format_type, net::UnescapeRule::NORMAL, 415 default_format_type, net::UnescapeRule::NORMAL,
510 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, 416 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
511 417
512 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", 418 {"ftp: with Japanese IDN", "ftp://xn--l8jvb1ey91xtjb.jp/config.sys",
513 default_format_type, net::UnescapeRule::NORMAL, 419 default_format_type, net::UnescapeRule::NORMAL,
514 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, 420 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
515 421
516 // -------- omit_username_password flag tests -------- 422 // -------- omit_username_password flag tests --------
517 {"With username and password, omit_username_password=false", 423 {"With username and password, omit_username_password=false",
518 "http://user:passwd@example.com/foo", "", kFormatUrlOmitNothing, 424 "http://user:passwd@example.com/foo", kFormatUrlOmitNothing,
519 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19}, 425 net::UnescapeRule::NORMAL, L"http://user:passwd@example.com/foo", 19},
520 426
521 {"With username and password, omit_username_password=true", 427 {"With username and password, omit_username_password=true",
522 "http://user:passwd@example.com/foo", "", default_format_type, 428 "http://user:passwd@example.com/foo", default_format_type,
523 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7}, 429 net::UnescapeRule::NORMAL, L"http://example.com/foo", 7},
524 430
525 {"With username and no password", "http://user@example.com/foo", "", 431 {"With username and no password", "http://user@example.com/foo",
526 default_format_type, net::UnescapeRule::NORMAL, 432 default_format_type, net::UnescapeRule::NORMAL,
527 L"http://example.com/foo", 7}, 433 L"http://example.com/foo", 7},
528 434
529 {"Just '@' without username and password", "http://@example.com/foo", "", 435 {"Just '@' without username and password", "http://@example.com/foo",
530 default_format_type, net::UnescapeRule::NORMAL, 436 default_format_type, net::UnescapeRule::NORMAL,
531 L"http://example.com/foo", 7}, 437 L"http://example.com/foo", 7},
532 438
533 // GURL doesn't think local-part of an email address is username for URL. 439 // GURL doesn't think local-part of an email address is username for URL.
534 {"mailto:, omit_username_password=true", "mailto:foo@example.com", "", 440 {"mailto:, omit_username_password=true", "mailto:foo@example.com",
535 default_format_type, net::UnescapeRule::NORMAL, 441 default_format_type, net::UnescapeRule::NORMAL,
536 L"mailto:foo@example.com", 7}, 442 L"mailto:foo@example.com", 7},
537 443
538 // -------- unescape flag tests -------- 444 // -------- unescape flag tests --------
539 {"Do not unescape", 445 {"Do not unescape",
540 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 446 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
541 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 447 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
542 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 448 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
543 "en", default_format_type, net::UnescapeRule::NONE, 449 default_format_type, net::UnescapeRule::NONE,
544 // GURL parses %-encoded hostnames into Punycode. 450 // GURL parses %-encoded hostnames into Punycode.
545 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 451 L"http://\x30B0\x30FC\x30B0\x30EB.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB "
546 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 452 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
547 7}, 453 7},
548 454
549 {"Unescape normally", 455 {"Unescape normally",
550 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 456 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
551 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 457 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
552 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 458 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
553 "en", default_format_type, net::UnescapeRule::NORMAL, 459 default_format_type, net::UnescapeRule::NORMAL,
554 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" 460 L"http://\x30B0\x30FC\x30B0\x30EB.jp/\x30B0\x30FC\x30B0\x30EB"
555 L"?q=\x30B0\x30FC\x30B0\x30EB", 461 L"?q=\x30B0\x30FC\x30B0\x30EB",
556 7}, 462 7},
557 463
558 {"Unescape normally with BiDi control character", 464 {"Unescape normally with BiDi control character",
559 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", 465 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy",
560 default_format_type, net::UnescapeRule::NORMAL, 466 default_format_type, net::UnescapeRule::NORMAL,
561 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, 467 L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
562 468
563 {"Unescape normally including unescape spaces", 469 {"Unescape normally including unescape spaces",
564 "http://www.google.com/search?q=Hello%20World", "en", 470 "http://www.google.com/search?q=Hello%20World",
565 default_format_type, net::UnescapeRule::SPACES, 471 default_format_type, net::UnescapeRule::SPACES,
566 L"http://www.google.com/search?q=Hello World", 7}, 472 L"http://www.google.com/search?q=Hello World", 7},
567 473
568 /* 474 /*
569 {"unescape=true with some special characters", 475 {"unescape=true with some special characters",
570 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", 476 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z",
571 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 477 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
572 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, 478 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
573 */ 479 */
574 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". 480 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
575 481
576 // -------- omit http: -------- 482 // -------- omit http: --------
577 {"omit http with user name", "http://user@example.com/foo", "", 483 {"omit http with user name", "http://user@example.com/foo",
578 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0}, 484 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"example.com/foo", 0},
579 485
580 {"omit http", "http://www.google.com/", "en", kFormatUrlOmitHTTP, 486 {"omit http", "http://www.google.com/", kFormatUrlOmitHTTP,
581 net::UnescapeRule::NORMAL, L"www.google.com/", 0}, 487 net::UnescapeRule::NORMAL, L"www.google.com/", 0},
582 488
583 {"omit http with https", "https://www.google.com/", "en", 489 {"omit http with https", "https://www.google.com/",
584 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, 490 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL,
585 L"https://www.google.com/", 8}, 491 L"https://www.google.com/", 8},
586 492
587 {"omit http starts with ftp.", "http://ftp.google.com/", "en", 493 {"omit http starts with ftp.", "http://ftp.google.com/",
588 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/", 494 kFormatUrlOmitHTTP, net::UnescapeRule::NORMAL, L"http://ftp.google.com/",
589 7}, 495 7},
590 496
591 // -------- omit trailing slash on bare hostname -------- 497 // -------- omit trailing slash on bare hostname --------
592 {"omit slash when it's the entire path", "http://www.google.com/", "en", 498 {"omit slash when it's the entire path", "http://www.google.com/",
593 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 499 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
594 L"http://www.google.com", 7}, 500 L"http://www.google.com", 7},
595 {"omit slash when there's a ref", "http://www.google.com/#ref", "en", 501 {"omit slash when there's a ref", "http://www.google.com/#ref",
596 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 502 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
597 L"http://www.google.com/#ref", 7}, 503 L"http://www.google.com/#ref", 7},
598 {"omit slash when there's a query", "http://www.google.com/?", "en", 504 {"omit slash when there's a query", "http://www.google.com/?",
599 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 505 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
600 L"http://www.google.com/?", 7}, 506 L"http://www.google.com/?", 7},
601 {"omit slash when it's not the entire path", "http://www.google.com/foo", 507 {"omit slash when it's not the entire path", "http://www.google.com/foo",
602 "en", kFormatUrlOmitTrailingSlashOnBareHostname, 508 kFormatUrlOmitTrailingSlashOnBareHostname,
603 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7}, 509 net::UnescapeRule::NORMAL, L"http://www.google.com/foo", 7},
604 {"omit slash for nonstandard URLs", "data:/", "en", 510 {"omit slash for nonstandard URLs", "data:/",
605 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 511 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
606 L"data:/", 5}, 512 L"data:/", 5},
607 {"omit slash for file URLs", "file:///", "en", 513 {"omit slash for file URLs", "file:///",
608 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL, 514 kFormatUrlOmitTrailingSlashOnBareHostname, net::UnescapeRule::NORMAL,
609 L"file:///", 7}, 515 L"file:///", 7},
610 516
611 // -------- view-source: -------- 517 // -------- view-source: --------
612 {"view-source", "view-source:http://xn--qcka1pmc.jp/", "ja", 518 {"view-source", "view-source:http://xn--qcka1pmc.jp/",
613 default_format_type, net::UnescapeRule::NORMAL, 519 default_format_type, net::UnescapeRule::NORMAL,
614 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19}, 520 L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 19},
615 521
616 {"view-source of view-source", 522 {"view-source of view-source",
617 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", 523 "view-source:view-source:http://xn--qcka1pmc.jp/",
618 default_format_type, net::UnescapeRule::NORMAL, 524 default_format_type, net::UnescapeRule::NORMAL,
619 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, 525 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
620 526
621 // view-source should omit http and trailing slash where non-view-source 527 // view-source should omit http and trailing slash where non-view-source
622 // would. 528 // would.
623 {"view-source omit http", "view-source:http://a.b/c", "en", 529 {"view-source omit http", "view-source:http://a.b/c",
624 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12}, 530 kFormatUrlOmitAll, net::UnescapeRule::NORMAL, L"view-source:a.b/c", 12},
625 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c", 531 {"view-source omit http starts with ftp.", "view-source:http://ftp.b/c",
626 "en", kFormatUrlOmitAll, net::UnescapeRule::NORMAL, 532 kFormatUrlOmitAll, net::UnescapeRule::NORMAL,
627 L"view-source:http://ftp.b/c", 19}, 533 L"view-source:http://ftp.b/c", 19},
628 {"view-source omit slash when it's the entire path", 534 {"view-source omit slash when it's the entire path",
629 "view-source:http://a.b/", "en", kFormatUrlOmitAll, 535 "view-source:http://a.b/", kFormatUrlOmitAll,
630 net::UnescapeRule::NORMAL, L"view-source:a.b", 12}, 536 net::UnescapeRule::NORMAL, L"view-source:a.b", 12},
631 }; 537 };
632 538
633 for (size_t i = 0; i < arraysize(tests); ++i) { 539 for (size_t i = 0; i < arraysize(tests); ++i) {
634 size_t prefix_len; 540 size_t prefix_len;
635 base::string16 formatted = FormatUrl( 541 base::string16 formatted = FormatUrl(
636 GURL(tests[i].input), tests[i].languages, tests[i].format_types, 542 GURL(tests[i].input), std::string(), tests[i].format_types,
637 tests[i].escape_rules, NULL, &prefix_len, NULL); 543 tests[i].escape_rules, NULL, &prefix_len, NULL);
638 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; 544 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
639 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; 545 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
640 } 546 }
641 } 547 }
642 548
643 TEST(UrlFormatterTest, FormatUrlParsed) { 549 TEST(UrlFormatterTest, FormatUrlParsed) {
644 // No unescape case. 550 // No unescape case.
645 url::Parsed parsed; 551 url::Parsed parsed;
646 base::string16 formatted = 552 base::string16 formatted =
647 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 553 FormatUrl(GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
648 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 554 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
649 "ja", kFormatUrlOmitNothing, net::UnescapeRule::NONE, &parsed, 555 std::string(), kFormatUrlOmitNothing, net::UnescapeRule::NONE,
650 NULL, NULL); 556 &parsed, NULL, NULL);
651 EXPECT_EQ(WideToUTF16( 557 EXPECT_EQ(WideToUTF16(
652 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 558 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
653 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); 559 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
654 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), 560 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
655 formatted.substr(parsed.username.begin, parsed.username.len)); 561 formatted.substr(parsed.username.begin, parsed.username.len));
656 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), 562 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
657 formatted.substr(parsed.password.begin, parsed.password.len)); 563 formatted.substr(parsed.password.begin, parsed.password.len));
658 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 564 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
659 formatted.substr(parsed.host.begin, parsed.host.len)); 565 formatted.substr(parsed.host.begin, parsed.host.len));
660 EXPECT_EQ(WideToUTF16(L"8080"), 566 EXPECT_EQ(WideToUTF16(L"8080"),
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
855 if (test_char && 761 if (test_char &&
856 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { 762 strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
857 EXPECT_NE(url.spec(), GURL(formatted).spec()); 763 EXPECT_NE(url.spec(), GURL(formatted).spec());
858 } else { 764 } else {
859 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 765 EXPECT_EQ(url.spec(), GURL(formatted).spec());
860 } 766 }
861 } 767 }
862 } 768 }
863 769
864 TEST(UrlFormatterTest, FormatUrlWithOffsets) { 770 TEST(UrlFormatterTest, FormatUrlWithOffsets) {
865 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, 771 CheckAdjustedOffsets(std::string(), kFormatUrlOmitNothing,
866 net::UnescapeRule::NORMAL, NULL); 772 net::UnescapeRule::NORMAL, NULL);
867 773
868 const size_t basic_offsets[] = { 774 const size_t basic_offsets[] = {
869 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 775 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
870 21, 22, 23, 24, 25 776 21, 22, 23, 24, 25
871 }; 777 };
872 CheckAdjustedOffsets("http://www.google.com/foo/", "en", 778 CheckAdjustedOffsets("http://www.google.com/foo/",
873 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 779 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
874 basic_offsets); 780 basic_offsets);
875 781
876 const size_t omit_auth_offsets_1[] = { 782 const size_t omit_auth_offsets_1[] = {
877 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 783 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
878 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 784 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
879 }; 785 };
880 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", 786 CheckAdjustedOffsets("http://foo:bar@www.google.com/",
881 kFormatUrlOmitUsernamePassword, 787 kFormatUrlOmitUsernamePassword,
882 net::UnescapeRule::NORMAL, omit_auth_offsets_1); 788 net::UnescapeRule::NORMAL, omit_auth_offsets_1);
883 789
884 const size_t omit_auth_offsets_2[] = { 790 const size_t omit_auth_offsets_2[] = {
885 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, 791 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
886 15, 16, 17, 18, 19, 20, 21 792 15, 16, 17, 18, 19, 20, 21
887 }; 793 };
888 CheckAdjustedOffsets("http://foo@www.google.com/", "en", 794 CheckAdjustedOffsets("http://foo@www.google.com/",
889 kFormatUrlOmitUsernamePassword, 795 kFormatUrlOmitUsernamePassword,
890 net::UnescapeRule::NORMAL, omit_auth_offsets_2); 796 net::UnescapeRule::NORMAL, omit_auth_offsets_2);
891 797
892 const size_t dont_omit_auth_offsets[] = { 798 const size_t dont_omit_auth_offsets[] = {
893 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 799 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
894 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 800 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
895 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 801 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
896 30, 31 802 30, 31
897 }; 803 };
898 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". 804 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
899 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", 805 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/",
900 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 806 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
901 dont_omit_auth_offsets); 807 dont_omit_auth_offsets);
902 808
903 const size_t view_source_offsets[] = { 809 const size_t view_source_offsets[] = {
904 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, 810 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
905 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 811 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
906 }; 812 };
907 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", 813 CheckAdjustedOffsets("view-source:http://foo@www.google.com/",
908 kFormatUrlOmitUsernamePassword, 814 kFormatUrlOmitUsernamePassword,
909 net::UnescapeRule::NORMAL, view_source_offsets); 815 net::UnescapeRule::NORMAL, view_source_offsets);
910 816
911 const size_t idn_hostname_offsets_1[] = { 817 const size_t idn_hostname_offsets_1[] = {
912 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 818 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
913 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 819 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
914 13, 14, 15, 16, 17, 18, 19 820 13, 14, 15, 16, 17, 18, 19
915 }; 821 };
916 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". 822 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
917 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", 823 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/",
918 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, 824 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL,
919 idn_hostname_offsets_1); 825 idn_hostname_offsets_1);
920 826
921 const size_t idn_hostname_offsets_2[] = { 827 const size_t idn_hostname_offsets_2[] = {
922 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, 828 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
923 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, 829 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
924 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 830 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
925 kNpos, 19, 20, 21, 22, 23, 24 831 kNpos, 19, 20, 21, 22, 23, 24
926 }; 832 };
927 // Convert punycode to 833 // Convert punycode to
928 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". 834 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
929 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", 835 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
930 "zh-CN", kFormatUrlOmitNothing, 836 kFormatUrlOmitNothing,
931 net::UnescapeRule::NORMAL, idn_hostname_offsets_2); 837 net::UnescapeRule::NORMAL, idn_hostname_offsets_2);
932 838
933 const size_t unescape_offsets[] = { 839 const size_t unescape_offsets[] = {
934 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 840 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
935 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, 841 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
936 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, 842 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
937 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 843 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
938 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos 844 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
939 }; 845 };
940 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". 846 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
941 CheckAdjustedOffsets( 847 CheckAdjustedOffsets(
942 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 848 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
943 "en", kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets); 849 kFormatUrlOmitNothing, net::UnescapeRule::SPACES, unescape_offsets);
944 850
945 const size_t ref_offsets[] = { 851 const size_t ref_offsets[] = {
946 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 852 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
947 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 853 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
948 33 854 33
949 }; 855 };
950 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". 856 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
951 CheckAdjustedOffsets( 857 CheckAdjustedOffsets(
952 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", 858 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z",
953 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets); 859 kFormatUrlOmitNothing, net::UnescapeRule::NORMAL, ref_offsets);
954 860
955 const size_t omit_http_offsets[] = { 861 const size_t omit_http_offsets[] = {
956 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 862 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
957 10, 11, 12, 13, 14 863 10, 11, 12, 13, 14
958 }; 864 };
959 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, 865 CheckAdjustedOffsets("http://www.google.com/", kFormatUrlOmitHTTP,
960 net::UnescapeRule::NORMAL, omit_http_offsets); 866 net::UnescapeRule::NORMAL, omit_http_offsets);
961 867
962 const size_t omit_http_start_with_ftp_offsets[] = { 868 const size_t omit_http_start_with_ftp_offsets[] = {
963 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 869 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
964 }; 870 };
965 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, 871 CheckAdjustedOffsets("http://ftp.google.com/", kFormatUrlOmitHTTP,
966 net::UnescapeRule::NORMAL, 872 net::UnescapeRule::NORMAL,
967 omit_http_start_with_ftp_offsets); 873 omit_http_start_with_ftp_offsets);
968 874
969 const size_t omit_all_offsets[] = { 875 const size_t omit_all_offsets[] = {
970 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 876 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
971 0, 1, 2, 3, 4, 5, 6, 7 877 0, 1, 2, 3, 4, 5, 6, 7
972 }; 878 };
973 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, 879 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
974 net::UnescapeRule::NORMAL, omit_all_offsets); 880 net::UnescapeRule::NORMAL, omit_all_offsets);
975 } 881 }
976 882
977 } // namespace 883 } // namespace
978 884
979 } // namespace url_formatter 885 } // namespace url_formatter
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698