OLD | NEW |
---|---|
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
6 | 6 |
7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
160 // Prolonged Sound) used out-of-context. | 160 // Prolonged Sound) used out-of-context. |
161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
162 // unless they're preceded by a Katakana. | 162 // unless they're preceded by a Katakana. |
163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
164 // (U+30D[8-A]) that look exactly like each other when they're used in a | 164 // (U+30D[8-A]) that look exactly like each other when they're used in a |
165 // label otherwise entirely in Katakna or Hiragana. | 165 // label otherwise entirely in Katakna or Hiragana. |
166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
167 // Letter Co) to be next to Latin. | 167 // Letter Co) to be next to Latin. |
168 // - Disallow Latin 'o' and 'g' next to Armenian. | 168 // - Disallow Latin 'o' and 'g' next to Armenian. |
169 // - Disalow mixing of Latin and Canadian Syllabary. | 169 // - Disalow mixing of Latin and Canadian Syllabary. |
170 // - Disalow mixing of Latin and Tifinagh. | |
Peter Kasting
2017/05/22 18:47:46
Are there other "Latin + <script>" combos we shoul
| |
170 dangerous_pattern = new icu::RegexMatcher( | 171 dangerous_pattern = new icu::RegexMatcher( |
171 icu::UnicodeString( | 172 icu::UnicodeString( |
172 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 173 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" |
173 R"([\u30ce\u30f3\u30bd\u30be])" | 174 R"([\u30ce\u30f3\u30bd\u30be])" |
174 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 175 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" |
175 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 176 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" |
176 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 177 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" |
177 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 178 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
178 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 179 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
179 R"([a-z]\u30fb|\u30fb[a-z]|)" | 180 R"([a-z]\u30fb|\u30fb[a-z]|)" |
180 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 181 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
181 R"([a-z][\u0585\u0581]+[a-z]|)" | 182 R"([a-z][\u0585\u0581]+[a-z]|)" |
182 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 183 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
183 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 184 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
184 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}])", | 185 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
186 R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}])", | |
Peter Kasting
2017/05/22 18:47:46
Nit: Feels like there's maybe a less redundant way
| |
185 -1, US_INV), | 187 -1, US_INV), |
186 0, status); | 188 0, status); |
187 tls_index.Set(dangerous_pattern); | 189 tls_index.Set(dangerous_pattern); |
188 } | 190 } |
189 dangerous_pattern->reset(label_string); | 191 dangerous_pattern->reset(label_string); |
190 return !dangerous_pattern->find(); | 192 return !dangerous_pattern->find(); |
191 } | 193 } |
192 | 194 |
193 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 195 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( |
194 const icu::UnicodeString& label) { | 196 const icu::UnicodeString& label) { |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
287 allowed_set.remove(0x0F8Cu); | 289 allowed_set.remove(0x0F8Cu); |
288 allowed_set.remove(0x0F8Du); | 290 allowed_set.remove(0x0F8Du); |
289 allowed_set.remove(0x0F8Eu); | 291 allowed_set.remove(0x0F8Eu); |
290 allowed_set.remove(0x0F8Fu); | 292 allowed_set.remove(0x0F8Fu); |
291 #endif | 293 #endif |
292 | 294 |
293 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 295 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
294 } | 296 } |
295 | 297 |
296 } // namespace url_formatter | 298 } // namespace url_formatter |
OLD | NEW |