Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: components/url_formatter/url_formatter_unittest.cc

Issue 2683793010: Block domain labels made of Cyrillic letters that look alike Latin (Closed)
Patch Set: go back to ps11 Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/url_formatter/url_formatter.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <string.h> 8 #include <string.h>
9 9
10 #include <vector> 10 #include <vector>
(...skipping 14 matching lines...) Expand all
25 using base::ASCIIToUTF16; 25 using base::ASCIIToUTF16;
26 26
27 const size_t kNpos = base::string16::npos; 27 const size_t kNpos = base::string16::npos;
28 28
29 struct IDNTestCase { 29 struct IDNTestCase {
30 const char* const input; 30 const char* const input;
31 const wchar_t* unicode_output; 31 const wchar_t* unicode_output;
32 const bool unicode_allowed; 32 const bool unicode_allowed;
33 }; 33 };
34 34
35 // TODO(jshin): Replace L"..." with "..." in UTF-8 when it's easier to read.
35 const IDNTestCase idn_cases[] = { 36 const IDNTestCase idn_cases[] = {
36 // No IDN 37 // No IDN
37 {"www.google.com", L"www.google.com", true}, 38 {"www.google.com", L"www.google.com", true},
38 {"www.google.com.", L"www.google.com.", true}, 39 {"www.google.com.", L"www.google.com.", true},
39 {".", L".", true}, 40 {".", L".", true},
40 {"", L"", true}, 41 {"", L"", true},
41 // IDN 42 // IDN
42 // Hanzi (Traditional Chinese) 43 // Hanzi (Traditional Chinese)
43 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, 44 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true},
44 // Hanzi ('video' in Simplified Chinese 45 // Hanzi ('video' in Simplified Chinese
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, 203 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false},
203 // Han + U+30FC + Han 204 // Han + U+30FC + Han
204 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, 205 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false},
205 // Latin + U+30FC + Latin 206 // Latin + U+30FC + Latin
206 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, 207 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false},
207 // Latin + U+30FB + Latin 208 // Latin + U+30FB + Latin
208 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, 209 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false},
209 // U+30FB + Latin 210 // U+30FB + Latin
210 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, 211 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false},
211 212
213 // Cyrillic labels made of Latin-look-alike Cyrillic letters.
214 // ѕсоре.com with ѕсоре in Cyrillic
215 {"xn--e1argc3h.com", L"\x0455\x0441\x043e\x0440\x0435.com", false},
216 // ѕсоре123.com with ѕсоре in Cyrillic.
217 {"xn--123-qdd8bmf3n.com",
218 L"\x0455\x0441\x043e\x0440\x0435" L"123.com", false},
219 // ѕсоре-рау.com with ѕсоре and рау in Cyrillic.
220 {"xn----8sbn9akccw8m.com",
221 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.com", false},
222 // ѕсоре·рау.com with scope and pay in Cyrillic and U+00B7 between them.
223 {"xn--uba29ona9akccw8m.com",
224 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.com", false},
225
226 // The same as above three, but in IDN TLD.
227 {"xn--e1argc3h.xn--p1ai",
228 L"\x0455\x0441\x043e\x0440\x0435.\x0440\x0444", true},
229 {"xn--123-qdd8bmf3n.xn--p1ai",
230 L"\x0455\x0441\x043e\x0440\x0435" L"123.\x0440\x0444", true},
231 {"xn--uba29ona9akccw8m.xn--p1ai",
232 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444",
233 true},
234
235 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic.
236 {"xn----8sbn9akccw8m.xn--3e0b707e",
237 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true},
238
239 // музей (museum in Russian) has characters without a Latin-look-alike.
240 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true},
241
212 // Mixed digits: the first two will also fail mixed script test 242 // Mixed digits: the first two will also fail mixed script test
213 // Latin + ASCII digit + Deva digit 243 // Latin + ASCII digit + Deva digit
214 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, 244 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false},
215 // Latin + Deva digit + Beng digit 245 // Latin + Deva digit + Beng digit
216 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, 246 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false},
217 // ASCII digit + Deva digit 247 // ASCII digit + Deva digit
218 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, 248 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false},
219 // Deva digit + Beng digit 249 // Deva digit + Beng digit
220 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, 250 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false},
221 // U+4E00 (CJK Ideograph One) is not a digit 251 // U+4E00 (CJK Ideograph One) is not a digit
(...skipping 699 matching lines...) Expand 10 before | Expand all | Expand 10 after
921 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 951 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
922 0, 1, 2, 3, 4, 5, 6, 7 952 0, 1, 2, 3, 4, 5, 6, 7
923 }; 953 };
924 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, 954 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
925 net::UnescapeRule::NORMAL, omit_all_offsets); 955 net::UnescapeRule::NORMAL, omit_all_offsets);
926 } 956 }
927 957
928 } // namespace 958 } // namespace
929 959
930 } // namespace url_formatter 960 } // namespace url_formatter
OLDNEW
« no previous file with comments | « components/url_formatter/url_formatter.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698