| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <string.h> | 8 #include <string.h> |
| 9 | 9 |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 14 matching lines...) Expand all Loading... |
| 25 using base::ASCIIToUTF16; | 25 using base::ASCIIToUTF16; |
| 26 | 26 |
| 27 const size_t kNpos = base::string16::npos; | 27 const size_t kNpos = base::string16::npos; |
| 28 | 28 |
| 29 struct IDNTestCase { | 29 struct IDNTestCase { |
| 30 const char* const input; | 30 const char* const input; |
| 31 const wchar_t* unicode_output; | 31 const wchar_t* unicode_output; |
| 32 const bool unicode_allowed; | 32 const bool unicode_allowed; |
| 33 }; | 33 }; |
| 34 | 34 |
| 35 // TODO(jshin): Replace L"..." with "..." in UTF-8 when it's easier to read. |
| 35 const IDNTestCase idn_cases[] = { | 36 const IDNTestCase idn_cases[] = { |
| 36 // No IDN | 37 // No IDN |
| 37 {"www.google.com", L"www.google.com", true}, | 38 {"www.google.com", L"www.google.com", true}, |
| 38 {"www.google.com.", L"www.google.com.", true}, | 39 {"www.google.com.", L"www.google.com.", true}, |
| 39 {".", L".", true}, | 40 {".", L".", true}, |
| 40 {"", L"", true}, | 41 {"", L"", true}, |
| 41 // IDN | 42 // IDN |
| 42 // Hanzi (Traditional Chinese) | 43 // Hanzi (Traditional Chinese) |
| 43 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, | 44 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, |
| 44 // Hanzi ('video' in Simplified Chinese | 45 // Hanzi ('video' in Simplified Chinese |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 202 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, | 203 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, |
| 203 // Han + U+30FC + Han | 204 // Han + U+30FC + Han |
| 204 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, | 205 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, |
| 205 // Latin + U+30FC + Latin | 206 // Latin + U+30FC + Latin |
| 206 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, | 207 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, |
| 207 // Latin + U+30FB + Latin | 208 // Latin + U+30FB + Latin |
| 208 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, | 209 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, |
| 209 // U+30FB + Latin | 210 // U+30FB + Latin |
| 210 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, | 211 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, |
| 211 | 212 |
| 213 // Cyrillic labels made of Latin-look-alike Cyrillic letters. |
| 214 // ѕсоре.com with ѕсоре in Cyrillic |
| 215 {"xn--e1argc3h.com", L"\x0455\x0441\x043e\x0440\x0435.com", false}, |
| 216 // ѕсоре123.com with ѕсоре in Cyrillic. |
| 217 {"xn--123-qdd8bmf3n.com", |
| 218 L"\x0455\x0441\x043e\x0440\x0435" L"123.com", false}, |
| 219 // ѕсоре-рау.com with ѕсоре and рау in Cyrillic. |
| 220 {"xn----8sbn9akccw8m.com", |
| 221 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.com", false}, |
| 222 // ѕсоре·рау.com with scope and pay in Cyrillic and U+00B7 between them. |
| 223 {"xn--uba29ona9akccw8m.com", |
| 224 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.com", false}, |
| 225 |
| 226 // The same as above three, but in IDN TLD. |
| 227 {"xn--e1argc3h.xn--p1ai", |
| 228 L"\x0455\x0441\x043e\x0440\x0435.\x0440\x0444", true}, |
| 229 {"xn--123-qdd8bmf3n.xn--p1ai", |
| 230 L"\x0455\x0441\x043e\x0440\x0435" L"123.\x0440\x0444", true}, |
| 231 {"xn--uba29ona9akccw8m.xn--p1ai", |
| 232 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444", |
| 233 true}, |
| 234 |
| 235 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. |
| 236 {"xn----8sbn9akccw8m.xn--3e0b707e", |
| 237 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true}, |
| 238 |
| 239 // музей (museum in Russian) has characters without a Latin-look-alike. |
| 240 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true}, |
| 241 |
| 212 // Mixed digits: the first two will also fail mixed script test | 242 // Mixed digits: the first two will also fail mixed script test |
| 213 // Latin + ASCII digit + Deva digit | 243 // Latin + ASCII digit + Deva digit |
| 214 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, | 244 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, |
| 215 // Latin + Deva digit + Beng digit | 245 // Latin + Deva digit + Beng digit |
| 216 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, | 246 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, |
| 217 // ASCII digit + Deva digit | 247 // ASCII digit + Deva digit |
| 218 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, | 248 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, |
| 219 // Deva digit + Beng digit | 249 // Deva digit + Beng digit |
| 220 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, | 250 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, |
| 221 // U+4E00 (CJK Ideograph One) is not a digit | 251 // U+4E00 (CJK Ideograph One) is not a digit |
| (...skipping 699 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 921 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 951 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
| 922 0, 1, 2, 3, 4, 5, 6, 7 | 952 0, 1, 2, 3, 4, 5, 6, 7 |
| 923 }; | 953 }; |
| 924 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 954 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
| 925 net::UnescapeRule::NORMAL, omit_all_offsets); | 955 net::UnescapeRule::NORMAL, omit_all_offsets); |
| 926 } | 956 } |
| 927 | 957 |
| 928 } // namespace | 958 } // namespace |
| 929 | 959 |
| 930 } // namespace url_formatter | 960 } // namespace url_formatter |
| OLD | NEW |