Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: components/url_formatter/url_formatter_unittest.cc

Issue 2895103003: Drop Mongolian from the IDN script list and tighten up the policy on Armenian-Latin mixing (Closed)
Patch Set: block Armenian + Latin mix Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <string.h> 8 #include <string.h>
9 9
10 #include <vector> 10 #include <vector>
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
114 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true}, 114 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", true},
115 // Thai + Common 115 // Thai + Common
116 {"xn---123-9goxcp8c9db2r.th", 116 {"xn---123-9goxcp8c9db2r.th",
117 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true}, 117 L"\x0e20\x0e32\x0e29\x0e32\x0e44\x0e17\x0e22-123.th", true},
118 // Devangari (Hindi) 118 // Devangari (Hindi)
119 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true}, 119 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", true},
120 // Devanagari + Common 120 // Devanagari + Common
121 {"xn---123-kbjl2j0bl2k.in", 121 {"xn---123-kbjl2j0bl2k.in",
122 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true}, 122 L"\x0939\x093f\x0928\x094d\x0926\x0940-123.in", true},
123 123
124 // 5 Aspirational scripts 124 // 4 Aspirational scripts
125 // Unifieid Canadian Syllabary 125 // Unifieid Canadian Syllabary
126 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true}, 126 {"xn--dfe0tte.ca", L"\x1456\x14c2\x14ef.ca", true},
127 // Tifinagh 127 // Tifinagh
128 {"xn--4ljxa2bb4a6bxb.ma", 128 {"xn--4ljxa2bb4a6bxb.ma",
129 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true}, 129 L"\x2d5c\x2d49\x2d3c\x2d49\x2d4f\x2d30\x2d56.ma", true},
130 // Tifinagh with a disallowed character(U+2D6F) 130 // Tifinagh with a disallowed character(U+2D6F)
131 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false}, 131 {"xn--hmjzaby5d5f.ma", L"\x2d5c\x2d49\x2d3c\x2d6f\x2d49\x2d4f.ma", false},
132 // Yi 132 // Yi
133 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true}, 133 {"xn--4o7a6e1x64c.cn", L"\xa188\xa320\xa071\xa0b7.cn", true},
134 // Mongolian - 'ordu' (place, camp)
135 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", true},
136 // Mongolian with a disallowed character
137 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false},
138 // Miao/Pollad 134 // Miao/Pollad
139 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true}, 135 {"xn--2u0fpf0a.cn", L"\U00016f04\U00016f62\U00016f59.cn", true},
140 136
137 // Mongolian is disallowed because it's written vertically.
138 // Mongolian - 'ordu' (place, camp)
139 {"xn--56ec8bp.cn", L"\x1823\x1837\x1833\x1824.cn", false},
140 // Mongolian with a disallowed character
141 {"xn--95e5de3ds.cn", L"\x1823\x1837\x1804\x1833\x1824.cn", false},
142
141 // Script mixing tests 143 // Script mixing tests
142 // The following script combinations are allowed. 144 // The following script combinations are allowed.
143 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin. 145 // MODERATELY_RESTRICTIVE with Latin limited to ASCII-Latin.
144 // ASCII-Latin + Japn (Kana + Han) 146 // ASCII-Latin + Japn (Kana + Han)
145 // ASCII-Latin + Kore (Hangul + Han) 147 // ASCII-Latin + Kore (Hangul + Han)
146 // ASCII-Latin + Han + Bopomofo 148 // ASCII-Latin + Han + Bopomofo
147 // ASCII-Latin + any allowed script other than Cyrillic, Greek, Cherokee 149 // ASCII-Latin + any allowed script other than Cyrillic, Greek, Cherokee
148 // and Unified Canadian Syllabary 150 // and Unified Canadian Syllabary
149 // "payp<alpha>l.com" 151 // "payp<alpha>l.com"
150 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false}, 152 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", false},
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 // Canadian Syllabary + Latin 204 // Canadian Syllabary + Latin
203 {"xn--ab-lym.com", L"ab\x14BF.com", false}, 205 {"xn--ab-lym.com", L"ab\x14BF.com", false},
204 {"xn--ab1-p6q.com", L"ab1\x14BF.com", false}, 206 {"xn--ab1-p6q.com", L"ab1\x14BF.com", false},
205 {"xn--1ab-m6qd.com", L"\x14BF" L"1ab.com", false}, 207 {"xn--1ab-m6qd.com", L"\x14BF" L"1ab.com", false},
206 {"xn--ab-jymc.com", L"\x14BF" L"ab.com", false}, 208 {"xn--ab-jymc.com", L"\x14BF" L"ab.com", false},
207 // Tifinagh + Latin 209 // Tifinagh + Latin
208 {"xn--liy-go4a.com", L"li\u24dfy.com", false}, 210 {"xn--liy-go4a.com", L"li\u24dfy.com", false},
209 {"xn--rol-ho4a.com", L"rol\u24df.com", false}, 211 {"xn--rol-ho4a.com", L"rol\u24df.com", false},
210 {"xn--ily-eo4a.com", L"\u24dfily.com", false}, 212 {"xn--ily-eo4a.com", L"\u24dfily.com", false},
211 {"xn--1ly-eo4a.com", L"\u24df1ly.com", false}, 213 {"xn--1ly-eo4a.com", L"\u24df1ly.com", false},
214 // Miao + Latin
215 {"xn--liy-rc12a.com", L"li\U00016FD8y.com", false},
216 {"xn--rol-sc12a.com", L"roll\U00016FD8.com", false},
217 {"xn--ily-pc12a.com", L"\U00016FD8ily.com", false},
218 {"xn--1ly-pc12a.com", L"\U00016FD81ly.com", false},
212 219
213 // Invisibility check 220 // Invisibility check
214 // Thai tone mark malek(U+0E48) repeated 221 // Thai tone mark malek(U+0E48) repeated
215 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false}, 222 {"xn--03c0b3ca.th", L"\x0e23\x0e35\x0e48\x0e48.th", false},
216 // Accute accent repeated 223 // Accute accent repeated
217 {"xn--a-xbba.com", L"a\x0301\x0301.com", false}, 224 {"xn--a-xbba.com", L"a\x0301\x0301.com", false},
218 // 'a' with acuted accent + another acute accent 225 // 'a' with acuted accent + another acute accent
219 {"xn--1ca20i.com", L"\x00e1\x0301.com", false}, 226 {"xn--1ca20i.com", L"\x00e1\x0301.com", false},
220 // Combining mark at the beginning 227 // Combining mark at the beginning
221 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false}, 228 {"xn--abc-fdc.jp", L"\x0300" L"abc.jp", false},
222 229
223 // Mixed script confusable 230 // Mixed script confusable
224 // google with Armenian Small Letter Oh(U+0585) 231 // Armenian օ, ո, հ, and զ mixed with Latin
225 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false}, 232 {"xn--gogle-lkg.com", L"g\x0585ogle.com", false},
226 {"xn--range-kkg.com", L"\x0585range.com", false}, 233 {"xn--range-kkg.com", L"\x0585range.com", false},
227 {"xn--cucko-pkg.com", L"cucko\x0585.com", false}, 234 {"xn--cucko-pkg.com", L"cucko\x0585.com", false},
228 // Latin 'o' in Armenian. 235 {"xn--an-bed.com", L"\x0578" L"an.com", false},
236 {"xn--hig-tee.com", L"hig\x0570.com", false},
237 {"xn---ray-fef.com", L"\x0566-ray.com", false},
238 // Latin 'o', 'h' and 'n' in Armenian
229 {"xn--o-ybcg0cu0cq.com", 239 {"xn--o-ybcg0cu0cq.com",
230 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false}, 240 L"o\x0585\x0580\x0574\x0578\x0582\x0566\x0568.com", false},
241 {"xn--h-qccm4a.com", L"\x0580\x0574\x0578h.com", false},
242 {"xn--n-rccm3a.com", L"\x0580n\x0574\x0578.com", false},
243 {"xn--n1-0ddq0b.com", L"\x0580n1\x0574\x0578.com", false},
231 // Hiragana HE(U+3078) mixed with Katakana 244 // Hiragana HE(U+3078) mixed with Katakana
232 {"xn--49jxi3as0d0fpc.com", 245 {"xn--49jxi3as0d0fpc.com",
233 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false}, 246 L"\x30e2\x30d2\x30fc\x30c8\x3078\x30d6\x30f3.com", false},
234 247
235 // U+30FC should be preceded by a Hiragana/Katakana. 248 // U+30FC should be preceded by a Hiragana/Katakana.
236 // Katakana + U+30FC + Han 249 // Katakana + U+30FC + Han
237 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true}, 250 {"xn--lck0ip02qw5ya.jp", L"\x30ab\x30fc\x91ce\x7403.jp", true},
238 // Hiragana + U+30FC + Han 251 // Hiragana + U+30FC + Han
239 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true}, 252 {"xn--u8j5tr47nw5ya.jp", L"\x304b\x30fc\x91ce\x7403.jp", true},
240 // U+30FC + Han 253 // U+30FC + Han
(...skipping 808 matching lines...) Expand 10 before | Expand all | Expand 10 after
1049 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 1062 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
1050 0, 1, 2, 3, 4, 5, 6, 7 1063 0, 1, 2, 3, 4, 5, 6, 7
1051 }; 1064 };
1052 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll, 1065 CheckAdjustedOffsets("http://user@foo.com/", kFormatUrlOmitAll,
1053 net::UnescapeRule::NORMAL, omit_all_offsets); 1066 net::UnescapeRule::NORMAL, omit_all_offsets);
1054 } 1067 }
1055 1068
1056 } // namespace 1069 } // namespace
1057 1070
1058 } // namespace url_formatter 1071 } // namespace url_formatter
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698