Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(131)

Side by Side Diff: components/url_formatter/url_formatter_unittest.cc

Issue 2784933002: Mitigate spoofing attempt using Latin letters. (Closed)
Patch Set: add back U+04CF (ӏ) -> 'l' map Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <string.h> 8 #include <string.h>
9 9
10 #include <vector> 10 #include <vector>
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after
268 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444", 268 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444",
269 true}, 269 true},
270 270
271 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. 271 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic.
272 {"xn----8sbn9akccw8m.xn--3e0b707e", 272 {"xn----8sbn9akccw8m.xn--3e0b707e",
273 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true}, 273 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true},
274 274
275 // музей (museum in Russian) has characters without a Latin-look-alike. 275 // музей (museum in Russian) has characters without a Latin-look-alike.
276 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true}, 276 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true},
277 277
278 // Combining Diacritic marks after a script other than Latin-Greek-Cyrillic
279 {"xn--rsa2568fvxya.com", L"\xd55c\x0301\xae00.com", false}, // 한́글.com
280 {"xn--rsa0336bjom.com", L"\x6f22\x0307\x5b57.com", false}, // 漢̇字.com
281 // नागरी́.com
282 {"xn--lsa922apb7a6do.com", L"\x0928\x093e\x0917\x0930\x0940\x0301.com",
283 false},
284
285 // Similarity checks against the list of top domains. "digklmo68.com" and
286 // 'digklmo68.co.uk" are listed for unittest in the top domain list.
Peter Kasting 2017/05/09 01:37:04 Can we avoid including these test domains in the r
287 {"xn--igklmo68-nea32c.com", L"\x0111igklmo68.com", false}, // đigklmo68.com
288 {"www.xn--igklmo68-nea32c.com", L"www.\x0111igklmo68.com", false},
289 {"foo.bar.xn--igklmo68-nea32c.com", L"foo.bar.\x0111igklmo68.com", false},
290 {"xn--igklmo68-nea32c.co.uk", L"\x0111igklmo68.co.uk", false},
291 {"mail.xn--igklmo68-nea32c.co.uk", L"mail.\x0111igklmo68.co.uk", false},
292 {"xn--digklmo68-6jf.com", L"di\x0307gklmo68.com", false}, // di̇gklmo68.com
293 {"xn--digklmo68-7vf.com", L"dig\x0331klmo68.com", false}, // dig̱klmo68.com
294 {"xn--diglmo68-omb.com", L"dig\x0138lmo68.com", false}, // digĸlmo68.com
295 {"xn--digkmo68-9ob.com", L"digk\x0142mo68.com", false}, // digkłmo68.com
296 {"xn--digklo68-l89c.com", L"digkl\x1e43o68.com", false}, // digklṃo68.com
297 {"xn--digklm68-b5a.com", L"digklm\x00f8" L"68.com", false}, // digklmø68.com
298 {"xn--digklmo8-h7g.com", L"digklmo\x0431" L"8.com", false}, // digklmoб8.com
299 {"xn--digklmo6-7yr.com", L"digklmo6\x09ea.com", false}, // digklmo6৪.com
300
301 // 'islkpx123.com' is listed for unitest in the top domain list.
302 // 'іѕӏкрх123' can look like 'islkpx123' in some fonts.
303 {"xn--123-bed4a4a6hh40i.com",
304 L"\x0456\x0455\x04cf\x043a\x0440\x0445" L"123.com", false},
305
278 // Mixed digits: the first two will also fail mixed script test 306 // Mixed digits: the first two will also fail mixed script test
279 // Latin + ASCII digit + Deva digit 307 // Latin + ASCII digit + Deva digit
280 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, 308 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false},
281 // Latin + Deva digit + Beng digit 309 // Latin + Deva digit + Beng digit
282 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, 310 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false},
283 // ASCII digit + Deva digit 311 // ASCII digit + Deva digit
284 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, 312 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false},
285 // Deva digit + Beng digit 313 // Deva digit + Beng digit
286 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, 314 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false},
287 // U+4E00 (CJK Ideograph One) is not a digit 315 // U+4E00 (CJK Ideograph One) is not a digit
(...skipping 13 matching lines...) Expand all
301 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false}, 329 {"xn--58db0a9q.com", L"\x13df\x13aa\x13a1\x13a0.com", false},
302 // Scripts excluded from Identifiers: UTS 31 Table 4 330 // Scripts excluded from Identifiers: UTS 31 Table 4
303 // Coptic 331 // Coptic
304 {"xn--5ya.com", L"\x03e7.com", false}, 332 {"xn--5ya.com", L"\x03e7.com", false},
305 // Old Italic 333 // Old Italic
306 {"xn--097cc.com", L"\U00010300\U00010301.com", false}, 334 {"xn--097cc.com", L"\U00010300\U00010301.com", false},
307 335
308 // U+115F (Hangul Filler) 336 // U+115F (Hangul Filler)
309 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false}, 337 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", false},
310 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false}, 338 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", false},
311 // Latin small capital w 339 // Latin small capital w: hardᴡare.com
312 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false}, 340 {"xn--hardare-l41c.com", L"hard\x1d21" L"are.com", false},
313 // Minus Sign(U+2212) 341 // Minus Sign(U+2212)
314 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false}, 342 {"xn--t9g238xc2a.jp", L"\x65e5\x2212\x672c.jp", false},
315 // Latin Small Letter Script G 343 // Latin Small Letter Script G: ɡɡ.com
316 {"xn--0naa.com", L"\x0261\x0261.com", false}, 344 {"xn--0naa.com", L"\x0261\x0261.com", false},
317 // Hangul Jamo(U+11xx) 345 // Hangul Jamo(U+11xx)
318 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false}, 346 {"xn--0pdc3b.com", L"\x1102\x1103\x1110.com", false},
319 // degree sign 347 // degree sign: 36°c.com
320 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false}, 348 {"xn--36c-tfa.com", L"36\x00b0" L"c.com", false},
321 // Pound sign 349 // Pound sign
322 {"xn--5free-9ga.com", L"5free\x00a8.com", false}, 350 {"xn--5free-9ga.com", L"5free\x00a8.com", false},
323 // Hebrew points (U+05B0, U+05B6) 351 // Hebrew points (U+05B0, U+05B6)
324 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false}, 352 {"xn--7cbl2kc2a.com", L"\x05e1\x05b6\x05e7\x05b0\x05e1.com", false},
325 // Danda(U+0964) 353 // Danda(U+0964)
326 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false}, 354 {"xn--81bp1b6ch8s.com", L"\x0924\x093f\x091c\x0964\x0930\x0940.com", false},
327 // Small letter script G(U+0261) 355 // Small letter script G(U+0261)
328 {"xn--oogle-qmc.com", L"\x0261oogle.com", false}, 356 {"xn--oogle-qmc.com", L"\x0261oogle.com", false},
329 // Small Katakana Extension(U+31F1) 357 // Small Katakana Extension(U+31F1)
330 {"xn--wlk.com", L"\x31f1.com", false}, 358 {"xn--wlk.com", L"\x31f1.com", false},
331 // Heart symbol 359 // Heart symbol: ♥
332 {"xn--ab-u0x.com", L"ab\x2665.com", false}, 360 {"xn--ab-u0x.com", L"ab\x2665.com", false},
333 // Emoji 361 // Emoji
334 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false}, 362 {"xn--vi8hiv.xyz", L"\U0001f355\U0001f4a9.xyz", false},
335 // Registered trade mark 363 // Registered trade mark
336 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false}, 364 {"xn--egistered-fna.com", L"\x00ae" L"egistered.com", false},
337 // Latin Letter Retroflex Click 365 // Latin Letter Retroflex Click
338 {"xn--registered-25c.com", L"registered\x01c3.com", false}, 366 {"xn--registered-25c.com", L"registered\x01c3.com", false},
339 // ASCII '!' not allowed in IDN 367 // ASCII '!' not allowed in IDN
340 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false}, 368 {"xn--!-257eu42c.kr", L"\xc548\xb155!.kr", false},
341 // 'GOOGLE' in IPA extension 369 // 'GOOGLE' in IPA extension: ɢᴏᴏɢʟᴇ
342 {"xn--1naa7pn51hcbaa.com", 370 {"xn--1naa7pn51hcbaa.com",
343 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false}, 371 L"\x0262\x1d0f\x1d0f\x0262\x029f\x1d07.com", false},
344 // Padlock icon spoof. 372 // Padlock icon spoof.
345 {"xn--google-hj64e", L"\U0001f512google.com", false}, 373 {"xn--google-hj64e", L"\U0001f512google.com", false},
346 374
347 // Custom black list 375 // Custom black list
348 // Combining Long Solidus Overlay 376 // Combining Long Solidus Overlay
349 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false}, 377 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", false},
350 // Hyphenation Point instead of Katakana Middle dot 378 // Hyphenation Point instead of Katakana Middle dot
351 {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false}, 379 {"xn--svgy16dha.jp", L"\x30a1\x2027\x30a3.jp", false},
(...skipping 635 matching lines...) Expand 10 before | Expand all | Expand 10 after
987 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 1015 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
988 0, 1, 2, 3, 4, 5, 6, 7 1016 0, 1, 2, 3, 4, 5, 6, 7
989 }; 1017 };
990 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, 1018 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
991 net::UnescapeRule::NORMAL, omit_all_offsets); 1019 net::UnescapeRule::NORMAL, omit_all_offsets);
992 } 1020 }
993 1021
994 } // namespace 1022 } // namespace
995 1023
996 } // namespace url_formatter 1024 } // namespace url_formatter
OLDNEW
« components/url_formatter/url_formatter.cc ('K') | « components/url_formatter/url_formatter.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698