 Chromium Code Reviews
 Chromium Code Reviews Issue 2894313002:
  Block Tifinagh + Latin mix  (Closed)
    
  
    Issue 2894313002:
  Block Tifinagh + Latin mix  (Closed) 
  | OLD | NEW | 
|---|---|
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" | 
| 6 | 6 | 
| 7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" | 
| 8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" | 
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" | 
| 10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" | 
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 160 // Prolonged Sound) used out-of-context. | 160 // Prolonged Sound) used out-of-context. | 
| 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 
| 162 // unless they're preceded by a Katakana. | 162 // unless they're preceded by a Katakana. | 
| 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 
| 164 // (U+30D[8-A]) that look exactly like each other when they're used in a | 164 // (U+30D[8-A]) that look exactly like each other when they're used in a | 
| 165 // label otherwise entirely in Katakna or Hiragana. | 165 // label otherwise entirely in Katakna or Hiragana. | 
| 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 
| 167 // Letter Co) to be next to Latin. | 167 // Letter Co) to be next to Latin. | 
| 168 // - Disallow Latin 'o' and 'g' next to Armenian. | 168 // - Disallow Latin 'o' and 'g' next to Armenian. | 
| 169 // - Disalow mixing of Latin and Canadian Syllabary. | 169 // - Disalow mixing of Latin and Canadian Syllabary. | 
| 170 // - Disalow mixing of Latin and Tifinagh. | |
| 
Peter Kasting
2017/05/22 18:47:46
Are there other "Latin + <script>" combos we shoul
 | |
| 170 dangerous_pattern = new icu::RegexMatcher( | 171 dangerous_pattern = new icu::RegexMatcher( | 
| 171 icu::UnicodeString( | 172 icu::UnicodeString( | 
| 172 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 173 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 
| 173 R"([\u30ce\u30f3\u30bd\u30be])" | 174 R"([\u30ce\u30f3\u30bd\u30be])" | 
| 174 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 175 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 
| 175 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 176 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 
| 176 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 177 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 
| 177 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 178 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 
| 178 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 179 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 
| 179 R"([a-z]\u30fb|\u30fb[a-z]|)" | 180 R"([a-z]\u30fb|\u30fb[a-z]|)" | 
| 180 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 181 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 
| 181 R"([a-z][\u0585\u0581]+[a-z]|)" | 182 R"([a-z][\u0585\u0581]+[a-z]|)" | 
| 182 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 183 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 
| 183 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 184 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 
| 184 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}])", | 185 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" | 
| 186 R"([\p{sc=tfng}].*[a-z]|[a-z].*[\p{sc=tfng}])", | |
| 
Peter Kasting
2017/05/22 18:47:46
Nit: Feels like there's maybe a less redundant way
 | |
| 185 -1, US_INV), | 187 -1, US_INV), | 
| 186 0, status); | 188 0, status); | 
| 187 tls_index.Set(dangerous_pattern); | 189 tls_index.Set(dangerous_pattern); | 
| 188 } | 190 } | 
| 189 dangerous_pattern->reset(label_string); | 191 dangerous_pattern->reset(label_string); | 
| 190 return !dangerous_pattern->find(); | 192 return !dangerous_pattern->find(); | 
| 191 } | 193 } | 
| 192 | 194 | 
| 193 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 195 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 
| 194 const icu::UnicodeString& label) { | 196 const icu::UnicodeString& label) { | 
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 287 allowed_set.remove(0x0F8Cu); | 289 allowed_set.remove(0x0F8Cu); | 
| 288 allowed_set.remove(0x0F8Du); | 290 allowed_set.remove(0x0F8Du); | 
| 289 allowed_set.remove(0x0F8Eu); | 291 allowed_set.remove(0x0F8Eu); | 
| 290 allowed_set.remove(0x0F8Fu); | 292 allowed_set.remove(0x0F8Fu); | 
| 291 #endif | 293 #endif | 
| 292 | 294 | 
| 293 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 295 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 
| 294 } | 296 } | 
| 295 | 297 | 
| 296 } // namespace url_formatter | 298 } // namespace url_formatter | 
| OLD | NEW |