OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 | 9 |
10 #include <vector> | 10 #include <vector> |
(...skipping 14 matching lines...) Expand all Loading... |
25 using base::ASCIIToUTF16; | 25 using base::ASCIIToUTF16; |
26 | 26 |
27 const size_t kNpos = base::string16::npos; | 27 const size_t kNpos = base::string16::npos; |
28 | 28 |
29 struct IDNTestCase { | 29 struct IDNTestCase { |
30 const char* const input; | 30 const char* const input; |
31 const wchar_t* unicode_output; | 31 const wchar_t* unicode_output; |
32 const bool unicode_allowed; | 32 const bool unicode_allowed; |
33 }; | 33 }; |
34 | 34 |
| 35 // TODO(jshin): Replace L"..." with "..." in UTF-8 when it's easier to read. |
35 const IDNTestCase idn_cases[] = { | 36 const IDNTestCase idn_cases[] = { |
36 // No IDN | 37 // No IDN |
37 {"www.google.com", L"www.google.com", true}, | 38 {"www.google.com", L"www.google.com", true}, |
38 {"www.google.com.", L"www.google.com.", true}, | 39 {"www.google.com.", L"www.google.com.", true}, |
39 {".", L".", true}, | 40 {".", L".", true}, |
40 {"", L"", true}, | 41 {"", L"", true}, |
41 // IDN | 42 // IDN |
42 // Hanzi (Traditional Chinese) | 43 // Hanzi (Traditional Chinese) |
43 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, | 44 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", true}, |
44 // Hanzi ('video' in Simplified Chinese | 45 // Hanzi ('video' in Simplified Chinese |
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
202 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, | 203 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, |
203 // Han + U+30FC + Han | 204 // Han + U+30FC + Han |
204 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, | 205 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, |
205 // Latin + U+30FC + Latin | 206 // Latin + U+30FC + Latin |
206 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, | 207 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, |
207 // Latin + U+30FB + Latin | 208 // Latin + U+30FB + Latin |
208 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, | 209 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, |
209 // U+30FB + Latin | 210 // U+30FB + Latin |
210 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, | 211 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, |
211 | 212 |
| 213 // Cyrillic labels made of Latin-look-alike Cyrillic letters. |
| 214 // ѕсоре.com with ѕсоре in Cyrillic |
| 215 {"xn--e1argc3h.com", L"\x0455\x0441\x043e\x0440\x0435.com", false}, |
| 216 // ѕсоре123.com with ѕсоре in Cyrillic. |
| 217 {"xn--123-qdd8bmf3n.com", |
| 218 L"\x0455\x0441\x043e\x0440\x0435" L"123.com", false}, |
| 219 // ѕсоре-рау.com with ѕсоре and рау in Cyrillic. |
| 220 {"xn----8sbn9akccw8m.com", |
| 221 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.com", false}, |
| 222 // ѕсоре·рау.com with scope and pay in Cyrillic and U+00B7 between them. |
| 223 {"xn--uba29ona9akccw8m.com", |
| 224 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.com", false}, |
| 225 |
| 226 // The same as above three, but in IDN TLD. |
| 227 {"xn--e1argc3h.xn--p1ai", |
| 228 L"\x0455\x0441\x043e\x0440\x0435.\x0440\x0444", true}, |
| 229 {"xn--123-qdd8bmf3n.xn--p1ai", |
| 230 L"\x0455\x0441\x043e\x0440\x0435" L"123.\x0440\x0444", true}, |
| 231 {"xn--uba29ona9akccw8m.xn--p1ai", |
| 232 L"\x0455\x0441\x043e\x0440\x0435\u00b7\x0440\x0430\x0443.\x0440\x0444", |
| 233 true}, |
| 234 |
| 235 // ѕсоре-рау.한국 with ѕсоре and рау in Cyrillic. |
| 236 {"xn----8sbn9akccw8m.xn--3e0b707e", |
| 237 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.\xd55c\xad6d", true}, |
| 238 |
| 239 // музей (museum in Russian) has characters without a Latin-look-alike. |
| 240 {"xn--e1adhj9a.com", L"\x043c\x0443\x0437\x0435\x0439.com", true}, |
| 241 |
212 // Mixed digits: the first two will also fail mixed script test | 242 // Mixed digits: the first two will also fail mixed script test |
213 // Latin + ASCII digit + Deva digit | 243 // Latin + ASCII digit + Deva digit |
214 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, | 244 {"xn--asc1deva-j0q.co.in", L"asc1deva\x0967.co.in", false}, |
215 // Latin + Deva digit + Beng digit | 245 // Latin + Deva digit + Beng digit |
216 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, | 246 {"xn--devabeng-f0qu3f.co.in", L"deva\x0967" L"beng\x09e7.co.in", false}, |
217 // ASCII digit + Deva digit | 247 // ASCII digit + Deva digit |
218 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, | 248 {"xn--79-v5f.co.in", L"7\x09ea" L"9.co.in", false}, |
219 // Deva digit + Beng digit | 249 // Deva digit + Beng digit |
220 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, | 250 {"xn--e4b0x.co.in", L"\x0967\x09e7.co.in", false}, |
221 // U+4E00 (CJK Ideograph One) is not a digit | 251 // U+4E00 (CJK Ideograph One) is not a digit |
(...skipping 699 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
921 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 951 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
922 0, 1, 2, 3, 4, 5, 6, 7 | 952 0, 1, 2, 3, 4, 5, 6, 7 |
923 }; | 953 }; |
924 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 954 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
925 net::UnescapeRule::NORMAL, omit_all_offsets); | 955 net::UnescapeRule::NORMAL, omit_all_offsets); |
926 } | 956 } |
927 | 957 |
928 } // namespace | 958 } // namespace |
929 | 959 |
930 } // namespace url_formatter | 960 } // namespace url_formatter |
OLD | NEW |