Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
| 6 | 6 |
| 7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
| 8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 160 // Prolonged Sound) used out-of-context. | 160 // Prolonged Sound) used out-of-context. |
| 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
| 162 // unless they're preceded by a Katakana. | 162 // unless they're preceded by a Katakana. |
| 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
| 164 // (U+30D[8-A]) that look exactly like each other when they're used in a | 164 // (U+30D[8-A]) that look exactly like each other when they're used in a |
| 165 // label otherwise entirely in Katakna or Hiragana. | 165 // label otherwise entirely in Katakna or Hiragana. |
| 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
| 167 // Letter Co) to be next to Latin. | 167 // Letter Co) to be next to Latin. |
| 168 // - Disallow Latin 'o' and 'g' next to Armenian. | 168 // - Disallow Latin 'o' and 'g' next to Armenian. |
| 169 // - Disalow mixing of Latin and Canadian Syllabary. | 169 // - Disalow mixing of Latin and Canadian Syllabary. |
| 170 // - Disalow mixing of Latin and Tifinagh. | |
|
Peter Kasting
2017/05/22 18:47:46
Are there other "Latin + <script>" combos we shoul
| |
| 170 dangerous_pattern = new icu::RegexMatcher( | 171 dangerous_pattern = new icu::RegexMatcher( |
| 171 icu::UnicodeString( | 172 icu::UnicodeString( |
| 172 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 173 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" |
| 173 R"([\u30ce\u30f3\u30bd\u30be])" | 174 R"([\u30ce\u30f3\u30bd\u30be])" |
| 174 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 175 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" |
| 175 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 176 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" |
| 176 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 177 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" |
| 177 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 178 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
| 178 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 179 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
| 179 R"([a-z]\u30fb|\u30fb[a-z]|)" | 180 R"([a-z]\u30fb|\u30fb[a-z]|)" |
| 180 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 181 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
| 181 R"([a-z][\u0585\u0581]+[a-z]|)" | 182 R"([a-z][\u0585\u0581]+[a-z]|)" |
| 182 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 183 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
| 183 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 184 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
| 184 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}])", | 185 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
| 186 R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}])", | |
|
Peter Kasting
2017/05/22 18:47:46
Nit: Feels like there's maybe a less redundant way
| |
| 185 -1, US_INV), | 187 -1, US_INV), |
| 186 0, status); | 188 0, status); |
| 187 tls_index.Set(dangerous_pattern); | 189 tls_index.Set(dangerous_pattern); |
| 188 } | 190 } |
| 189 dangerous_pattern->reset(label_string); | 191 dangerous_pattern->reset(label_string); |
| 190 return !dangerous_pattern->find(); | 192 return !dangerous_pattern->find(); |
| 191 } | 193 } |
| 192 | 194 |
| 193 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 195 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( |
| 194 const icu::UnicodeString& label) { | 196 const icu::UnicodeString& label) { |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 287 allowed_set.remove(0x0F8Cu); | 289 allowed_set.remove(0x0F8Cu); |
| 288 allowed_set.remove(0x0F8Du); | 290 allowed_set.remove(0x0F8Du); |
| 289 allowed_set.remove(0x0F8Eu); | 291 allowed_set.remove(0x0F8Eu); |
| 290 allowed_set.remove(0x0F8Fu); | 292 allowed_set.remove(0x0F8Fu); |
| 291 #endif | 293 #endif |
| 292 | 294 |
| 293 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 295 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
| 294 } | 296 } |
| 295 | 297 |
| 296 } // namespace url_formatter | 298 } // namespace url_formatter |
| OLD | NEW |