OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 | 9 |
10 #include <vector> | 10 #include <vector> |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
268 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444", | 268 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444", |
269 true}, | 269 true}, |
270 | 270 |
271 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. | 271 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. |
272 {"xn----8sbn9akccw8m.xn--3e0b707e", | 272 {"xn----8sbn9akccw8m.xn--3e0b707e", |
273 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true}, | 273 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true}, |
274 | 274 |
275 // музей (museum in Russian) has characters without a Latin-look-alike. | 275 // музей (museum in Russian) has characters without a Latin-look-alike. |
276 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true}, | 276 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true}, |
277 | 277 |
278 // Combining Diacritic marks after a script other than Latin-Greek-Cyrillic | |
279 {"xn--rsa2568fvxya.com", L"\xd55c\x0301\xae00.com", false}, // 한́글.com | |
280 {"xn--rsa0336bjom.com", L"\x6f22\x0307\x5b57.com", false}, // 漢̇字.com | |
281 // नागरी́.com | |
282 {"xn--lsa922apb7a6do.com", L"\x0928\x093e\x0917\x0930\x0940\x0301.com", | |
283 false}, | |
284 | |
285 // Similarity checks against the list of top domains. "digklmo68.com" and | |
286 // 'digklmo68.co.uk" are listed for unittest in the top domain list. | |
Peter Kasting
2017/05/09 01:37:04
Can we avoid including these test domains in the r
| |
287 {"xn--igklmo68-nea32c.com", L"\x0111igklmo68.com", false}, // đigklmo68.com | |
288 {"www.xn--igklmo68-nea32c.com", L"www.\x0111igklmo68.com", false}, | |
289 {"foo.bar.xn--igklmo68-nea32c.com", L"foo.bar.\x0111igklmo68.com", false}, | |
290 {"xn--igklmo68-nea32c.co.uk", L"\x0111igklmo68.co.uk", false}, | |
291 {"mail.xn--igklmo68-nea32c.co.uk", L"mail.\x0111igklmo68.co.uk", false}, | |
292 {"xn--digklmo68-6jf.com", L"di\x0307gklmo68.com", false}, // di̇gklmo68.com | |
293 {"xn--digklmo68-7vf.com", L"dig\x0331klmo68.com", false}, // dig̱klmo68.com | |
294 {"xn--diglmo68-omb.com", L"dig\x0138lmo68.com", false}, // digĸlmo68.com | |
295 {"xn--digkmo68-9ob.com", L"digk\x0142mo68.com", false}, // digkłmo68.com | |
296 {"xn--digklo68-l89c.com", L"digkl\x1e43o68.com", false}, // digklṃo68.com | |
297 {"xn--digklm68-b5a.com", L"digklm\x00f8" L"68.com", false}, // digklmø68.com | |
298 {"xn--digklmo8-h7g.com", L"digklmo\x0431" L"8.com", false}, // digklmoб8.com | |
299 {"xn--digklmo6-7yr.com", L"digklmo6\x09ea.com", false}, // digklmo6৪.com | |
300 | |
301 // 'islkpx123.com' is listed for unitest in the top domain list. | |
302 // 'іѕӏкрх123' can look like 'islkpx123' in some fonts. | |
303 {"xn--123-bed4a4a6hh40i.com", | |
304 L"\x0456\x0455\x04cf\x043a\x0440\x0445" L"123.com", false}, | |
305 | |
278 // Mixed digits: the first two will also fail mixed script test | 306 // Mixed digits: the first two will also fail mixed script test |
279 // Latin + ASCII digit + Deva digit | 307 // Latin + ASCII digit + Deva digit |
280 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, | 308 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, |
281 // Latin + Deva digit + Beng digit | 309 // Latin + Deva digit + Beng digit |
282 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, | 310 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, |
283 // ASCII digit + Deva digit | 311 // ASCII digit + Deva digit |
284 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, | 312 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, |
285 // Deva digit + Beng digit | 313 // Deva digit + Beng digit |
286 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, | 314 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, |
287 // U+4E00 (CJK Ideograph One) is not a digit | 315 // U+4E00 (CJK Ideograph One) is not a digit |
(...skipping 13 matching lines...) Expand all Loading... | |
301 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false}, | 329 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false}, |
302 // Scripts excluded from Identifiers: UTS 31 Table 4 | 330 // Scripts excluded from Identifiers: UTS 31 Table 4 |
303 // Coptic | 331 // Coptic |
304 {"xn--5ya.com", L"\x03e7.com", false}, | 332 {"xn--5ya.com", L"\x03e7.com", false}, |
305 // Old Italic | 333 // Old Italic |
306 {"xn--097cc.com", L"\U00010300\U00010301.com", false}, | 334 {"xn--097cc.com", L"\U00010300\U00010301.com", false}, |
307 | 335 |
308 // U+115F (Hangul Filler) | 336 // U+115F (Hangul Filler) |
309 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, | 337 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, |
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, | 338 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, |
311 // Latin small capital w | 339 // Latin small capital w: hardᴡare.com |
312 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false}, | 340 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false}, |
313 // Minus Sign(U+2212) | 341 // Minus Sign(U+2212) |
314 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false}, | 342 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false}, |
315 // Latin Small Letter Script G | 343 // Latin Small Letter Script G: ɡɡ.com |
316 {"xn--0naa.com", L"\x0261\x0261.com", false}, | 344 {"xn--0naa.com", L"\x0261\x0261.com", false}, |
317 // Hangul Jamo(U+11xx) | 345 // Hangul Jamo(U+11xx) |
318 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false}, | 346 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false}, |
319 // degree sign | 347 // degree sign: 36°c.com |
320 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false}, | 348 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false}, |
321 // Pound sign | 349 // Pound sign |
322 {"xn--5free-9ga.com", L"5free\x00a8.com", false}, | 350 {"xn--5free-9ga.com", L"5free\x00a8.com", false}, |
323 // Hebrew points (U+05B0, U+05B6) | 351 // Hebrew points (U+05B0, U+05B6) |
324 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false}, | 352 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false}, |
325 // Danda(U+0964) | 353 // Danda(U+0964) |
326 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false}, | 354 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false}, |
327 // Small letter script G(U+0261) | 355 // Small letter script G(U+0261) |
328 {"xn--oogle-qmc.com", L"\x0261oogle.com", false}, | 356 {"xn--oogle-qmc.com", L"\x0261oogle.com", false}, |
329 // Small Katakana Extension(U+31F1) | 357 // Small Katakana Extension(U+31F1) |
330 {"xn--wlk.com", L"\x31f1.com", false}, | 358 {"xn--wlk.com", L"\x31f1.com", false}, |
331 // Heart symbol | 359 // Heart symbol: ♥ |
332 {"xn--ab-u0x.com", L"ab\x2665.com", false}, | 360 {"xn--ab-u0x.com", L"ab\x2665.com", false}, |
333 // Emoji | 361 // Emoji |
334 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false}, | 362 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false}, |
335 // Registered trade mark | 363 // Registered trade mark |
336 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false}, | 364 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false}, |
337 // Latin Letter Retroflex Click | 365 // Latin Letter Retroflex Click |
338 {"xn--registered-25c.com", L"registered\x01c3.com", false}, | 366 {"xn--registered-25c.com", L"registered\x01c3.com", false}, |
339 // ASCII '!' not allowed in IDN | 367 // ASCII '!' not allowed in IDN |
340 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false}, | 368 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false}, |
341 // 'GOOGLE' in IPA extension | 369 // 'GOOGLE' in IPA extension: ɢᴏᴏɢʟᴇ |
342 {"xn--1naa7pn51hcbaa.com", | 370 {"xn--1naa7pn51hcbaa.com", |
343 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false}, | 371 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false}, |
344 // Padlock icon spoof. | 372 // Padlock icon spoof. |
345 {"xn--google-hj64e", L"\U0001f512google.com", false}, | 373 {"xn--google-hj64e", L"\U0001f512google.com", false}, |
346 | 374 |
347 // Custom black list | 375 // Custom black list |
348 // Combining Long Solidus Overlay | 376 // Combining Long Solidus Overlay |
349 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false}, | 377 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false}, |
350 // Hyphenation Point instead of Katakana Middle dot | 378 // Hyphenation Point instead of Katakana Middle dot |
351 {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false}, | 379 {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false}, |
(...skipping 635 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
987 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 1015 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
988 0, 1, 2, 3, 4, 5, 6, 7 | 1016 0, 1, 2, 3, 4, 5, 6, 7 |
989 }; | 1017 }; |
990 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 1018 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
991 net::UnescapeRule::NORMAL, omit_all_offsets); | 1019 net::UnescapeRule::NORMAL, omit_all_offsets); |
992 } | 1020 } |
993 | 1021 |
994 } // namespace | 1022 } // namespace |
995 | 1023 |
996 } // namespace url_formatter | 1024 } // namespace url_formatter |
OLD | NEW |