OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 | 9 |
10 #include <vector> | 10 #include <vector> |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 // Devanagari + Bengali | 180 // Devanagari + Bengali |
181 {"xn--11b0x.in", L"\x0915\x0995.in", false}, | 181 {"xn--11b0x.in", L"\x0915\x0995.in", false}, |
182 | 182 |
183 // Invisibility check | 183 // Invisibility check |
184 // Thai tone mark malek(U+0E48) repeated | 184 // Thai tone mark malek(U+0E48) repeated |
185 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, | 185 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, |
186 // Accute accent repeated | 186 // Accute accent repeated |
187 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, | 187 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, |
188 // 'a' with acuted accent + another acute accent | 188 // 'a' with acuted accent + another acute accent |
189 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, | 189 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, |
| 190 // Combining mark at the beginning |
| 191 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false}, |
190 | 192 |
191 // Mixed script confusable | 193 // Mixed script confusable |
192 // google with Armenian Small Letter Oh(U+0585) | 194 // google with Armenian Small Letter Oh(U+0585) |
193 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, | 195 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, |
194 {"xn--range-kkg.com", L"\x0585range.com", false}, | 196 {"xn--range-kkg.com", L"\x0585range.com", false}, |
195 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, | 197 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, |
196 // Latin 'o' in Armenian. | 198 // Latin 'o' in Armenian. |
197 {"xn--o-ybcg0cu0cq.com", | 199 {"xn--o-ybcg0cu0cq.com", |
198 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, | 200 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, |
199 // Hiragana HE(U+3078) mixed with Katakana | 201 // Hiragana HE(U+3078) mixed with Katakana |
200 {"xn--49jxi3as0d0fpc.com", | 202 {"xn--49jxi3as0d0fpc.com", |
201 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, | 203 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, |
| 204 |
| 205 // U+30FC should be preceded by a Hiragana/Katakana. |
| 206 // Katakana + U+30FC + Han |
| 207 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true}, |
| 208 // Hiragana + U+30FC + Han |
| 209 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true}, |
202 // U+30FC + Han | 210 // U+30FC + Han |
203 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, | 211 {"xn--weka801xo02a.com", L"\x30fc\x52d5\x753b\x30fc.com", false}, |
204 // Han + U+30FC + Han | 212 // Han + U+30FC + Han |
205 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, | 213 {"xn--wekz60nb2ay85atj0b.jp", L"\x65e5\x672c\x30fc\x91ce\x7403.jp", false}, |
| 214 // U+30FC at the beginning |
| 215 {"xn--wek060nb2a.jp", L"\x30fc\x65e5\x672c", false}, |
206 // Latin + U+30FC + Latin | 216 // Latin + U+30FC + Latin |
207 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, | 217 {"xn--abcdef-r64e.jp", L"abc\x30fc" L"def.jp", false}, |
| 218 |
| 219 // U+30FB (・) is not allowed next to Latin, but allowed otherwise. |
| 220 // U+30FB + Han |
| 221 {"xn--vekt920a.jp", L"\x30fb\x91ce.jp", true}, |
| 222 // Han + U+30FB + Han |
| 223 {"xn--vek160nb2ay85atj0b.jp", L"\x65e5\x672c\x30fb\x91ce\x7403.jp", true}, |
208 // Latin + U+30FB + Latin | 224 // Latin + U+30FB + Latin |
209 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, | 225 {"xn--abcdef-k64e.jp", L"abc\x30fb" L"def.jp", false}, |
210 // U+30FB + Latin | 226 // U+30FB + Latin |
211 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, | 227 {"xn--abc-os4b.jp", L"\x30fb" L"abc.jp", false}, |
212 | 228 |
| 229 // U+30FD (ヽ) is allowed only after Katakana. |
| 230 // Katakana + U+30FD |
| 231 {"xn--lck2i.jp", L"\x30ab\x30fd.jp", true}, |
| 232 // Hiragana + U+30FD |
| 233 {"xn--u8j7t.jp", L"\x304b\x30fd.jp", false}, |
| 234 // Han + U+30FD |
| 235 {"xn--xek368f.jp", L"\x4e00\x30fd.jp", false}, |
| 236 {"xn--aa-mju.jp", L"a\x30fd.jp", false}, |
| 237 {"xn--a1-bo4a.jp", L"a1\x30fd.jp", false}, |
| 238 |
| 239 // U+30FE (ヾ) is allowed only after Katakana. |
| 240 // Katakana + U+30FE |
| 241 {"xn--lck4i.jp", L"\x30ab\x30fe.jp", true}, |
| 242 // Hiragana + U+30FE |
| 243 {"xn--u8j9t.jp", L"\x304b\x30fe.jp", false}, |
| 244 // Han + U+30FE |
| 245 {"xn--yek168f.jp", L"\x4e00\x30fe.jp", false}, |
| 246 {"xn--a-oju.jp", L"a\x30fe.jp", false}, |
| 247 {"xn--a1-eo4a.jp", L"a1\x30fe.jp", false}, |
| 248 |
213 // Cyrillic labels made of Latin-look-alike Cyrillic letters. | 249 // Cyrillic labels made of Latin-look-alike Cyrillic letters. |
214 // ѕсоре.com with ѕсоре in Cyrillic | 250 // ѕсоре.com with ѕсоре in Cyrillic |
215 {"xn--e1argc3h.com", L"\x0455\x0441\x043e\x0440\x0435.com", false}, | 251 {"xn--e1argc3h.com", L"\x0455\x0441\x043e\x0440\x0435.com", false}, |
216 // ѕсоре123.com with ѕсоре in Cyrillic. | 252 // ѕсоре123.com with ѕсоре in Cyrillic. |
217 {"xn--123-qdd8bmf3n.com", | 253 {"xn--123-qdd8bmf3n.com", |
218 L"\x0455\x0441\x043e\x0440\x0435" L"123.com", false}, | 254 L"\x0455\x0441\x043e\x0440\x0435" L"123.com", false}, |
219 // ѕсоре-рау.com with ѕсоре and рау in Cyrillic. | 255 // ѕсоре-рау.com with ѕсоре and рау in Cyrillic. |
220 {"xn----8sbn9akccw8m.com", | 256 {"xn----8sbn9akccw8m.com", |
221 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.com", false}, | 257 L"\x0455\x0441\x043e\x0440\x0435-\x0440\x0430\x0443.com", false}, |
222 // ѕсоре·рау.com with scope and pay in Cyrillic and U+00B7 between them. | 258 // ѕсоре·рау.com with scope and pay in Cyrillic and U+00B7 between them. |
(...skipping 728 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
951 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, | 987 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, |
952 0, 1, 2, 3, 4, 5, 6, 7 | 988 0, 1, 2, 3, 4, 5, 6, 7 |
953 }; | 989 }; |
954 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, | 990 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, |
955 net::UnescapeRule::NORMAL, omit_all_offsets); | 991 net::UnescapeRule::NORMAL, omit_all_offsets); |
956 } | 992 } |
957 | 993 |
958 } // namespace | 994 } // namespace |
959 | 995 |
960 } // namespace url_formatter | 996 } // namespace url_formatter |
OLD | NEW |