OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
6 | 6 |
7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
229 // Prolonged Sound) used out-of-context. | 229 // Prolonged Sound) used out-of-context. |
230 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 230 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
231 // unless they're preceded by a Katakana. | 231 // unless they're preceded by a Katakana. |
232 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 232 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
233 // (U+30D[8-A]) that look exactly like each other when they're used in a | 233 // (U+30D[8-A]) that look exactly like each other when they're used in a |
234 // label otherwise entirely in Katakna or Hiragana. | 234 // label otherwise entirely in Katakna or Hiragana. |
235 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 235 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
236 // Letter Co) to be next to Latin. | 236 // Letter Co) to be next to Latin. |
237 // - Disallow Latin 'o' and 'g' next to Armenian. | 237 // - Disallow Latin 'o' and 'g' next to Armenian. |
238 // - Disalow mixing of Latin and Canadian Syllabary. | 238 // - Disalow mixing of Latin and Canadian Syllabary. |
| 239 // - Disalow mixing of Latin and Tifinagh. |
239 // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC | 240 // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC |
240 // character. Other combining diacritical marks are not in the allowed | 241 // character. Other combining diacritical marks are not in the allowed |
241 // character set. | 242 // character set. |
242 dangerous_pattern = new icu::RegexMatcher( | 243 dangerous_pattern = new icu::RegexMatcher( |
243 icu::UnicodeString( | 244 icu::UnicodeString( |
244 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 245 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" |
245 R"([\u30ce\u30f3\u30bd\u30be])" | 246 R"([\u30ce\u30f3\u30bd\u30be])" |
246 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 247 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" |
247 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 248 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" |
248 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 249 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" |
249 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 250 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
250 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 251 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
251 R"([a-z]\u30fb|\u30fb[a-z]|)" | 252 R"([a-z]\u30fb|\u30fb[a-z]|)" |
252 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 253 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
253 R"([a-z][\u0585\u0581]+[a-z]|)" | 254 R"([a-z][\u0585\u0581]+[a-z]|)" |
254 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 255 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
255 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 256 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
256 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" | 257 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
| 258 R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}]|)" |
257 R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", | 259 R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", |
258 -1, US_INV), | 260 -1, US_INV), |
259 0, status); | 261 0, status); |
260 tls_index.Set(dangerous_pattern); | 262 tls_index.Set(dangerous_pattern); |
261 } | 263 } |
262 dangerous_pattern->reset(label_string); | 264 dangerous_pattern->reset(label_string); |
263 return !dangerous_pattern->find(); | 265 return !dangerous_pattern->find(); |
264 } | 266 } |
265 | 267 |
266 bool IDNSpoofChecker::SimilarToTopDomains(base::StringPiece16 hostname) { | 268 bool IDNSpoofChecker::SimilarToTopDomains(base::StringPiece16 hostname) { |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
383 allowed_set.remove(0x0F8Cu); | 385 allowed_set.remove(0x0F8Cu); |
384 allowed_set.remove(0x0F8Du); | 386 allowed_set.remove(0x0F8Du); |
385 allowed_set.remove(0x0F8Eu); | 387 allowed_set.remove(0x0F8Eu); |
386 allowed_set.remove(0x0F8Fu); | 388 allowed_set.remove(0x0F8Fu); |
387 #endif | 389 #endif |
388 | 390 |
389 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 391 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
390 } | 392 } |
391 | 393 |
392 } // namespace url_formatter | 394 } // namespace url_formatter |
OLD | NEW |