| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
| 6 | 6 |
| 7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
| 8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
| (...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 229 // Prolonged Sound) used out-of-context. | 229 // Prolonged Sound) used out-of-context. |
| 230 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 230 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
| 231 // unless they're preceded by a Katakana. | 231 // unless they're preceded by a Katakana. |
| 232 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 232 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
| 233 // (U+30D[8-A]) that look exactly like each other when they're used in a | 233 // (U+30D[8-A]) that look exactly like each other when they're used in a |
| 234 // label otherwise entirely in Katakna or Hiragana. | 234 // label otherwise entirely in Katakna or Hiragana. |
| 235 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 235 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
| 236 // Letter Co) to be next to Latin. | 236 // Letter Co) to be next to Latin. |
| 237 // - Disallow Latin 'o' and 'g' next to Armenian. | 237 // - Disallow Latin 'o' and 'g' next to Armenian. |
| 238 // - Disalow mixing of Latin and Canadian Syllabary. | 238 // - Disalow mixing of Latin and Canadian Syllabary. |
| 239 // - Disalow mixing of Latin and Tifinagh. |
| 239 // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC | 240 // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC |
| 240 // character. Other combining diacritical marks are not in the allowed | 241 // character. Other combining diacritical marks are not in the allowed |
| 241 // character set. | 242 // character set. |
| 242 dangerous_pattern = new icu::RegexMatcher( | 243 dangerous_pattern = new icu::RegexMatcher( |
| 243 icu::UnicodeString( | 244 icu::UnicodeString( |
| 244 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 245 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" |
| 245 R"([\u30ce\u30f3\u30bd\u30be])" | 246 R"([\u30ce\u30f3\u30bd\u30be])" |
| 246 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 247 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" |
| 247 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 248 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" |
| 248 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 249 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" |
| 249 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 250 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
| 250 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 251 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
| 251 R"([a-z]\u30fb|\u30fb[a-z]|)" | 252 R"([a-z]\u30fb|\u30fb[a-z]|)" |
| 252 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 253 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
| 253 R"([a-z][\u0585\u0581]+[a-z]|)" | 254 R"([a-z][\u0585\u0581]+[a-z]|)" |
| 254 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 255 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
| 255 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 256 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
| 256 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" | 257 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
| 258 R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}]|)" |
| 257 R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", | 259 R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", |
| 258 -1, US_INV), | 260 -1, US_INV), |
| 259 0, status); | 261 0, status); |
| 260 tls_index.Set(dangerous_pattern); | 262 tls_index.Set(dangerous_pattern); |
| 261 } | 263 } |
| 262 dangerous_pattern->reset(label_string); | 264 dangerous_pattern->reset(label_string); |
| 263 return !dangerous_pattern->find(); | 265 return !dangerous_pattern->find(); |
| 264 } | 266 } |
| 265 | 267 |
| 266 bool IDNSpoofChecker::SimilarToTopDomains(base::StringPiece16 hostname) { | 268 bool IDNSpoofChecker::SimilarToTopDomains(base::StringPiece16 hostname) { |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 383 allowed_set.remove(0x0F8Cu); | 385 allowed_set.remove(0x0F8Cu); |
| 384 allowed_set.remove(0x0F8Du); | 386 allowed_set.remove(0x0F8Du); |
| 385 allowed_set.remove(0x0F8Eu); | 387 allowed_set.remove(0x0F8Eu); |
| 386 allowed_set.remove(0x0F8Fu); | 388 allowed_set.remove(0x0F8Fu); |
| 387 #endif | 389 #endif |
| 388 | 390 |
| 389 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 391 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
| 390 } | 392 } |
| 391 | 393 |
| 392 } // namespace url_formatter | 394 } // namespace url_formatter |
| OLD | NEW |