| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
| 6 | 6 |
| 7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
| 8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
| 11 #include "net/base/lookup_string_in_fixed_set.h" |
| 11 #include "third_party/icu/source/common/unicode/schriter.h" | 12 #include "third_party/icu/source/common/unicode/schriter.h" |
| 12 #include "third_party/icu/source/common/unicode/unistr.h" | 13 #include "third_party/icu/source/common/unicode/unistr.h" |
| 13 #include "third_party/icu/source/i18n/unicode/regex.h" | 14 #include "third_party/icu/source/i18n/unicode/regex.h" |
| 15 #include "third_party/icu/source/i18n/unicode/translit.h" |
| 14 #include "third_party/icu/source/i18n/unicode/uspoof.h" | 16 #include "third_party/icu/source/i18n/unicode/uspoof.h" |
| 15 | 17 |
| 16 namespace url_formatter { | 18 namespace url_formatter { |
| 17 | 19 |
| 18 namespace { | 20 namespace { |
| 19 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER; | 21 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER; |
| 20 | 22 |
| 21 void OnThreadTermination(void* regex_matcher) { | 23 void OnThreadTermination(void* regex_matcher) { |
| 22 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher); | 24 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher); |
| 23 } | 25 } |
| 24 | 26 |
| 27 #include "components/url_formatter/top_domains/alexa_skeletons-inc.cc" |
| 28 // All the domains in the above file have 3 or fewer labels. |
| 29 const size_t kNumberOfLabelsToCheck = 3; |
| 30 |
| 31 bool LookupMatchInTopDomains(base::StringPiece skeleton) { |
| 32 DCHECK_NE(skeleton.back(), '.'); |
| 33 auto labels = base::SplitStringPiece(skeleton, ".", base::KEEP_WHITESPACE, |
| 34 base::SPLIT_WANT_ALL); |
| 35 |
| 36 if (labels.size() > kNumberOfLabelsToCheck) { |
| 37 labels.erase(labels.begin(), |
| 38 labels.begin() + labels.size() - kNumberOfLabelsToCheck); |
| 39 } |
| 40 |
| 41 while (labels.size() > 1) { |
| 42 std::string partial_skeleton = base::JoinString(labels, "."); |
| 43 if (net::LookupStringInFixedSet( |
| 44 kDafsa, arraysize(kDafsa), partial_skeleton.data(), |
| 45 partial_skeleton.length()) != net::kDafsaNotFound) |
| 46 return true; |
| 47 labels.erase(labels.begin()); |
| 48 } |
| 49 return false; |
| 50 } |
| 51 |
| 25 } // namespace | 52 } // namespace |
| 26 | 53 |
| 27 IDNSpoofChecker::IDNSpoofChecker() { | 54 IDNSpoofChecker::IDNSpoofChecker() { |
| 28 UErrorCode status = U_ZERO_ERROR; | 55 UErrorCode status = U_ZERO_ERROR; |
| 29 checker_ = uspoof_open(&status); | 56 checker_ = uspoof_open(&status); |
| 30 if (U_FAILURE(status)) { | 57 if (U_FAILURE(status)) { |
| 31 checker_ = nullptr; | 58 checker_ = nullptr; |
| 32 return; | 59 return; |
| 33 } | 60 } |
| 34 | 61 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 61 UNICODE_STRING_SIMPLE("[\\u00df\\u03c2\\u200c\\u200d]"), status); | 88 UNICODE_STRING_SIMPLE("[\\u00df\\u03c2\\u200c\\u200d]"), status); |
| 62 deviation_characters_.freeze(); | 89 deviation_characters_.freeze(); |
| 63 | 90 |
| 64 // Latin letters outside ASCII. 'Script_Extensions=Latin' is not necessary | 91 // Latin letters outside ASCII. 'Script_Extensions=Latin' is not necessary |
| 65 // because additional characters pulled in with scx=Latn are not included in | 92 // because additional characters pulled in with scx=Latn are not included in |
| 66 // the allowed set. | 93 // the allowed set. |
| 67 non_ascii_latin_letters_ = | 94 non_ascii_latin_letters_ = |
| 68 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status); | 95 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status); |
| 69 non_ascii_latin_letters_.freeze(); | 96 non_ascii_latin_letters_.freeze(); |
| 70 | 97 |
| 71 // These letters are parts of |dangerous_patterns_|. | 98 // The following two sets are parts of |dangerous_patterns_|. |
| 72 kana_letters_exceptions_ = icu::UnicodeSet( | 99 kana_letters_exceptions_ = icu::UnicodeSet( |
| 73 UNICODE_STRING_SIMPLE("[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb-\\u30fe]"), | 100 UNICODE_STRING_SIMPLE("[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb-\\u30fe]"), |
| 74 status); | 101 status); |
| 75 kana_letters_exceptions_.freeze(); | 102 kana_letters_exceptions_.freeze(); |
| 103 combining_diacritics_exceptions_ = |
| 104 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0300-\\u0339]"), status); |
| 105 combining_diacritics_exceptions_.freeze(); |
| 76 | 106 |
| 77 // These Cyrillic letters look like Latin. A domain label entirely made of | 107 // These Cyrillic letters look like Latin. A domain label entirely made of |
| 78 // these letters is blocked as a simplified whole-script-spoofable. | 108 // these letters is blocked as a simplified whole-script-spoofable. |
| 79 cyrillic_letters_latin_alike_ = | 109 cyrillic_letters_latin_alike_ = |
| 80 icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); | 110 icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); |
| 81 cyrillic_letters_latin_alike_.freeze(); | 111 cyrillic_letters_latin_alike_.freeze(); |
| 82 | 112 |
| 83 cyrillic_letters_ = | 113 cyrillic_letters_ = |
| 84 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status); | 114 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status); |
| 85 cyrillic_letters_.freeze(); | 115 cyrillic_letters_.freeze(); |
| 86 | 116 |
| 87 DCHECK(U_SUCCESS(status)); | 117 DCHECK(U_SUCCESS(status)); |
| 118 // This set is used to determine whether or not to apply a slow |
| 119 // transliteration to remove diacritics to a given hostname before the |
| 120 // confusable skeleton calculation for comparison with top domain names. If |
| 121 // it has any character outside the set, the expensive step will be skipped |
| 122 // because it cannot match any of top domain names. |
| 123 // The last ([\u0300-\u0339] is a shorthand for "[:Identifier_Status=Allowed:] |
| 124 // & [:Script_Extensions=Inherited:] - [\\u200C\\u200D]". The latter is a |
| 125 // subset of the former but it does not matter because hostnames with |
| 126 // characters outside the latter set would be rejected in an earlier step. |
| 127 lgc_letters_n_ascii_ = icu::UnicodeSet( |
| 128 UNICODE_STRING_SIMPLE("[[:Latin:][:Greek:][:Cyrillic:][0-9\\u002e_" |
| 129 "\\u002d][\\u0300-\\u0339]]"), |
| 130 status); |
| 131 lgc_letters_n_ascii_.freeze(); |
| 132 |
| 133 // Used for diacritics-removal before the skeleton calculation. Add |
| 134 // "ł > l; ø > o; đ > d" that are not handled by "NFD; Nonspacing mark |
| 135 // removal; NFC". On top of that, supplement the Unicode confusable list by |
| 136 // replacing {U+043A (к), U+0138(ĸ), U+03BA(κ)}, U+04CF (ӏ) and U+043F(п) by |
| 137 // 'k', 'l' and 'n', respectively. |
| 138 // TODO(jshin): Revisit "ł > l; ø > o" mapping. |
| 139 UParseError parse_error; |
| 140 transliterator_.reset(icu::Transliterator::createFromRules( |
| 141 UNICODE_STRING_SIMPLE("DropAcc"), |
| 142 icu::UnicodeString("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;" |
| 143 " ł > l; ø > o; đ > d; ӏ > l; [кĸκ] > k; п > n;"), |
| 144 UTRANS_FORWARD, parse_error, status)); |
| 145 DCHECK(U_SUCCESS(status)) |
| 146 << "Spoofchecker initalization failed due to an error: " |
| 147 << u_errorName(status); |
| 88 } | 148 } |
| 89 | 149 |
| 90 IDNSpoofChecker::~IDNSpoofChecker() { | 150 IDNSpoofChecker::~IDNSpoofChecker() { |
| 91 uspoof_close(checker_); | 151 uspoof_close(checker_); |
| 92 } | 152 } |
| 93 | 153 |
| 94 bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, | 154 bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, |
| 95 bool is_tld_ascii) { | 155 bool is_tld_ascii) { |
| 96 UErrorCode status = U_ZERO_ERROR; | 156 UErrorCode status = U_ZERO_ERROR; |
| 97 int32_t result = | 157 int32_t result = |
| (...skipping 15 matching lines...) Expand all Loading... |
| 113 // "UTS 46 section 4 Processing step 4" applies validity criteria for | 173 // "UTS 46 section 4 Processing step 4" applies validity criteria for |
| 114 // non-transitional processing (i.e. do not map deviation characters) to any | 174 // non-transitional processing (i.e. do not map deviation characters) to any |
| 115 // punycode labels regardless of whether transitional or non-transitional is | 175 // punycode labels regardless of whether transitional or non-transitional is |
| 116 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted | 176 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted |
| 117 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as | 177 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as |
| 118 // such. See http://crbug.com/595263 . | 178 // such. See http://crbug.com/595263 . |
| 119 if (deviation_characters_.containsSome(label_string)) | 179 if (deviation_characters_.containsSome(label_string)) |
| 120 return false; | 180 return false; |
| 121 | 181 |
| 122 // If there's no script mixing, the input is regarded as safe without any | 182 // If there's no script mixing, the input is regarded as safe without any |
| 123 // extra check unless it contains Kana letter exceptions or it's made entirely | 183 // extra check unless it falls into one of three categories: |
| 124 // of Cyrillic letters that look like Latin letters. Note that the following | 184 // - contains Kana letter exceptions |
| 125 // combinations of scripts are treated as a 'logical' single script. | 185 // - the TLD is ASCII and the input is made entirely of Cyrillic letters |
| 186 // that look like Latin letters. |
| 187 // - it has combining diacritic marks. |
| 188 // Note that the following combinations of scripts are treated as a 'logical' |
| 189 // single script. |
| 126 // - Chinese: Han, Bopomofo, Common | 190 // - Chinese: Han, Bopomofo, Common |
| 127 // - Japanese: Han, Hiragana, Katakana, Common | 191 // - Japanese: Han, Hiragana, Katakana, Common |
| 128 // - Korean: Hangul, Han, Common | 192 // - Korean: Hangul, Han, Common |
| 129 result &= USPOOF_RESTRICTION_LEVEL_MASK; | 193 result &= USPOOF_RESTRICTION_LEVEL_MASK; |
| 130 if (result == USPOOF_ASCII) | 194 if (result == USPOOF_ASCII) |
| 131 return true; | 195 return true; |
| 132 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && | 196 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && |
| 133 kana_letters_exceptions_.containsNone(label_string)) { | 197 kana_letters_exceptions_.containsNone(label_string) && |
| 198 combining_diacritics_exceptions_.containsNone(label_string)) { |
| 134 // Check Cyrillic confusable only for ASCII TLDs. | 199 // Check Cyrillic confusable only for ASCII TLDs. |
| 135 return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); | 200 return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); |
| 136 } | 201 } |
| 137 | 202 |
| 138 // Additional checks for |label| with multiple scripts, one of which is Latin. | 203 // Additional checks for |label| with multiple scripts, one of which is Latin. |
| 139 // Disallow non-ASCII Latin letters to mix with a non-Latin script. | 204 // Disallow non-ASCII Latin letters to mix with a non-Latin script. |
| 140 if (non_ascii_latin_letters_.containsSome(label_string)) | 205 // Note that the non-ASCII Latin check should not be applied when the entire |
| 206 // label is made of Latin. Checking with lgc_letters set here should be fine |
| 207 // because script mixing of LGC is already rejected. |
| 208 if (non_ascii_latin_letters_.containsSome(label_string) && |
| 209 !lgc_letters_n_ascii_.containsAll(label_string)) |
| 141 return false; | 210 return false; |
| 142 | 211 |
| 143 if (!tls_index.initialized()) | 212 if (!tls_index.initialized()) |
| 144 tls_index.Initialize(&OnThreadTermination); | 213 tls_index.Initialize(&OnThreadTermination); |
| 145 icu::RegexMatcher* dangerous_pattern = | 214 icu::RegexMatcher* dangerous_pattern = |
| 146 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get()); | 215 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get()); |
| 147 if (!dangerous_pattern) { | 216 if (!dangerous_pattern) { |
| 148 // Disallow the katakana no, so, zo, or n, as they may be mistaken for | 217 // Disallow the katakana no, so, zo, or n, as they may be mistaken for |
| 149 // slashes when they're surrounded by non-Japanese scripts (i.e. scripts | 218 // slashes when they're surrounded by non-Japanese scripts (i.e. scripts |
| 150 // other than Katakana, Hiragana or Han). If {no, so, zo, n} next to a | 219 // other than Katakana, Hiragana or Han). If {no, so, zo, n} next to a |
| 151 // non-Japanese script on either side is disallowed, legitimate cases like | 220 // non-Japanese script on either side is disallowed, legitimate cases like |
| 152 // '{vitamin in Katakana}b6' are blocked. Note that trying to block those | 221 // '{vitamin in Katakana}b6' are blocked. Note that trying to block those |
| 153 // characters when used alone as a label is futile because those cases | 222 // characters when used alone as a label is futile because those cases |
| 154 // would not reach here. | 223 // would not reach here. |
| 155 // Also disallow what used to be blocked by mixed-script-confusable (MSC) | 224 // Also disallow what used to be blocked by mixed-script-confusable (MSC) |
| 156 // detection. ICU 58 does not detect MSC any more for a single input string. | 225 // detection. ICU 58 does not detect MSC any more for a single input string. |
| 157 // See http://bugs.icu-project.org/trac/ticket/12823 . | 226 // See http://bugs.icu-project.org/trac/ticket/12823 . |
| 158 // TODO(jshin): adjust the pattern once the above ICU bug is fixed. | 227 // TODO(jshin): adjust the pattern once the above ICU bug is fixed. |
| 159 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana | 228 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana |
| 160 // Prolonged Sound) used out-of-context. | 229 // Prolonged Sound) used out-of-context. |
| 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 230 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
| 162 // unless they're preceded by a Katakana. | 231 // unless they're preceded by a Katakana. |
| 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 232 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
| 164 // (U+30D[8-A]) that look exactly like each other when they're used in a | 233 // (U+30D[8-A]) that look exactly like each other when they're used in a |
| 165 // label otherwise entirely in Katakna or Hiragana. | 234 // label otherwise entirely in Katakna or Hiragana. |
| 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 235 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
| 167 // Letter Co) to be next to Latin. | 236 // Letter Co) to be next to Latin. |
| 168 // - Disallow Latin 'o' and 'g' next to Armenian. | 237 // - Disallow Latin 'o' and 'g' next to Armenian. |
| 169 // - Disalow mixing of Latin and Canadian Syllabary. | 238 // - Disalow mixing of Latin and Canadian Syllabary. |
| 239 // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC |
| 240 // character. Other combining diacritical marks are not in the allowed |
| 241 // character set. |
| 170 dangerous_pattern = new icu::RegexMatcher( | 242 dangerous_pattern = new icu::RegexMatcher( |
| 171 icu::UnicodeString( | 243 icu::UnicodeString( |
| 172 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 244 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" |
| 173 R"([\u30ce\u30f3\u30bd\u30be])" | 245 R"([\u30ce\u30f3\u30bd\u30be])" |
| 174 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 246 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" |
| 175 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 247 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" |
| 176 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 248 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" |
| 177 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 249 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
| 178 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 250 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
| 179 R"([a-z]\u30fb|\u30fb[a-z]|)" | 251 R"([a-z]\u30fb|\u30fb[a-z]|)" |
| 180 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 252 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
| 181 R"([a-z][\u0585\u0581]+[a-z]|)" | 253 R"([a-z][\u0585\u0581]+[a-z]|)" |
| 182 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 254 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
| 183 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 255 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
| 184 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}])", | 256 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" |
| 257 R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", |
| 185 -1, US_INV), | 258 -1, US_INV), |
| 186 0, status); | 259 0, status); |
| 187 tls_index.Set(dangerous_pattern); | 260 tls_index.Set(dangerous_pattern); |
| 188 } | 261 } |
| 189 dangerous_pattern->reset(label_string); | 262 dangerous_pattern->reset(label_string); |
| 190 return !dangerous_pattern->find(); | 263 return !dangerous_pattern->find(); |
| 191 } | 264 } |
| 192 | 265 |
| 266 bool IDNSpoofChecker::SimilarToTopDomains(base::StringPiece16 hostname) { |
| 267 size_t hostname_length = hostname.length() - (hostname.back() == '.' ? 1 : 0); |
| 268 icu::UnicodeString ustr_host(FALSE, hostname.data(), hostname_length); |
| 269 // If input has any characters outside Latin-Greek-Cyrillic and [0-9._-], |
| 270 // there is no point in getting rid of diacritics because combining marks |
| 271 // attached to non-LGC characters are already blocked. |
| 272 if (lgc_letters_n_ascii_.span(ustr_host, 0, USET_SPAN_CONTAINED) == |
| 273 ustr_host.length()) |
| 274 transliterator_.get()->transliterate(ustr_host); |
| 275 |
| 276 UErrorCode status = U_ZERO_ERROR; |
| 277 icu::UnicodeString ustr_skeleton; |
| 278 uspoof_getSkeletonUnicodeString(checker_, 0, ustr_host, ustr_skeleton, |
| 279 &status); |
| 280 if (U_FAILURE(status)) |
| 281 return false; |
| 282 std::string skeleton; |
| 283 ustr_skeleton.toUTF8String(skeleton); |
| 284 return LookupMatchInTopDomains(skeleton); |
| 285 } |
| 286 |
| 193 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 287 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( |
| 194 const icu::UnicodeString& label) { | 288 const icu::UnicodeString& label) { |
| 289 // Collect all the Cyrillic letters in |label_string| and see if they're |
| 290 // a subset of |cyrillic_letters_latin_alike_|. |
| 195 // A shortcut of defining cyrillic_letters_latin_alike_ to include [0-9] and | 291 // A shortcut of defining cyrillic_letters_latin_alike_ to include [0-9] and |
| 196 // [_-] and checking if the set contains all letters of |label_string| | 292 // [_-] and checking if the set contains all letters of |label| |
| 197 // would work in most cases, but not if a label has non-letters outside | 293 // would work in most cases, but not if a label has non-letters outside |
| 198 // ASCII. | 294 // ASCII. |
| 199 icu::UnicodeSet cyrillic_in_label; | 295 icu::UnicodeSet cyrillic_in_label; |
| 200 icu::StringCharacterIterator it(label); | 296 icu::StringCharacterIterator it(label); |
| 201 for (it.setToStart(); it.hasNext();) { | 297 for (it.setToStart(); it.hasNext();) { |
| 202 const UChar32 c = it.next32PostInc(); | 298 const UChar32 c = it.next32PostInc(); |
| 203 if (cyrillic_letters_.contains(c)) | 299 if (cyrillic_letters_.contains(c)) |
| 204 cyrillic_in_label.add(c); | 300 cyrillic_in_label.add(c); |
| 205 } | 301 } |
| 206 return !cyrillic_in_label.isEmpty() && | 302 return !cyrillic_in_label.isEmpty() && |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 287 allowed_set.remove(0x0F8Cu); | 383 allowed_set.remove(0x0F8Cu); |
| 288 allowed_set.remove(0x0F8Du); | 384 allowed_set.remove(0x0F8Du); |
| 289 allowed_set.remove(0x0F8Eu); | 385 allowed_set.remove(0x0F8Eu); |
| 290 allowed_set.remove(0x0F8Fu); | 386 allowed_set.remove(0x0F8Fu); |
| 291 #endif | 387 #endif |
| 292 | 388 |
| 293 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 389 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
| 294 } | 390 } |
| 295 | 391 |
| 296 } // namespace url_formatter | 392 } // namespace url_formatter |
| OLD | NEW |