| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/idn_spoof_checker.h" | 5 #include "components/url_formatter/idn_spoof_checker.h" |
| 6 | 6 |
| 7 #include "base/numerics/safe_conversions.h" | 7 #include "base/numerics/safe_conversions.h" |
| 8 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
| 9 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 10 #include "base/threading/thread_local_storage.h" | 10 #include "base/threading/thread_local_storage.h" |
| 11 #include "net/base/lookup_string_in_fixed_set.h" | |
| 12 #include "third_party/icu/source/common/unicode/schriter.h" | 11 #include "third_party/icu/source/common/unicode/schriter.h" |
| 13 #include "third_party/icu/source/common/unicode/unistr.h" | 12 #include "third_party/icu/source/common/unicode/unistr.h" |
| 14 #include "third_party/icu/source/i18n/unicode/regex.h" | 13 #include "third_party/icu/source/i18n/unicode/regex.h" |
| 15 #include "third_party/icu/source/i18n/unicode/translit.h" | |
| 16 #include "third_party/icu/source/i18n/unicode/uspoof.h" | 14 #include "third_party/icu/source/i18n/unicode/uspoof.h" |
| 17 | 15 |
| 18 namespace url_formatter { | 16 namespace url_formatter { |
| 19 | 17 |
| 20 namespace { | 18 namespace { |
| 21 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER; | 19 base::ThreadLocalStorage::StaticSlot tls_index = TLS_INITIALIZER; |
| 22 | 20 |
| 23 void OnThreadTermination(void* regex_matcher) { | 21 void OnThreadTermination(void* regex_matcher) { |
| 24 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher); | 22 delete reinterpret_cast<icu::RegexMatcher*>(regex_matcher); |
| 25 } | 23 } |
| 26 | 24 |
| 27 #include "components/url_formatter/top_domains/alexa_skeletons-inc.cc" | |
| 28 // All the domains in the above file have 3 or fewer labels. | |
| 29 const size_t kNumberOfLabelsToCheck = 3; | |
| 30 | |
| 31 bool LookupMatchInTopDomains(base::StringPiece skeleton) { | |
| 32 DCHECK_NE(skeleton.back(), '.'); | |
| 33 auto labels = base::SplitStringPiece(skeleton, ".", base::KEEP_WHITESPACE, | |
| 34 base::SPLIT_WANT_ALL); | |
| 35 | |
| 36 if (labels.size() > kNumberOfLabelsToCheck) { | |
| 37 labels.erase(labels.begin(), | |
| 38 labels.begin() + labels.size() - kNumberOfLabelsToCheck); | |
| 39 } | |
| 40 | |
| 41 while (labels.size() > 1) { | |
| 42 std::string partial_skeleton = base::JoinString(labels, "."); | |
| 43 if (net::LookupStringInFixedSet( | |
| 44 kDafsa, arraysize(kDafsa), partial_skeleton.data(), | |
| 45 partial_skeleton.length()) != net::kDafsaNotFound) | |
| 46 return true; | |
| 47 labels.erase(labels.begin()); | |
| 48 } | |
| 49 return false; | |
| 50 } | |
| 51 | |
| 52 } // namespace | 25 } // namespace |
| 53 | 26 |
| 54 IDNSpoofChecker::IDNSpoofChecker() { | 27 IDNSpoofChecker::IDNSpoofChecker() { |
| 55 UErrorCode status = U_ZERO_ERROR; | 28 UErrorCode status = U_ZERO_ERROR; |
| 56 checker_ = uspoof_open(&status); | 29 checker_ = uspoof_open(&status); |
| 57 if (U_FAILURE(status)) { | 30 if (U_FAILURE(status)) { |
| 58 checker_ = nullptr; | 31 checker_ = nullptr; |
| 59 return; | 32 return; |
| 60 } | 33 } |
| 61 | 34 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 88 UNICODE_STRING_SIMPLE("[\\u00df\\u03c2\\u200c\\u200d]"), status); | 61 UNICODE_STRING_SIMPLE("[\\u00df\\u03c2\\u200c\\u200d]"), status); |
| 89 deviation_characters_.freeze(); | 62 deviation_characters_.freeze(); |
| 90 | 63 |
| 91 // Latin letters outside ASCII. 'Script_Extensions=Latin' is not necessary | 64 // Latin letters outside ASCII. 'Script_Extensions=Latin' is not necessary |
| 92 // because additional characters pulled in with scx=Latn are not included in | 65 // because additional characters pulled in with scx=Latn are not included in |
| 93 // the allowed set. | 66 // the allowed set. |
| 94 non_ascii_latin_letters_ = | 67 non_ascii_latin_letters_ = |
| 95 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status); | 68 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Latin:] - [a-zA-Z]]"), status); |
| 96 non_ascii_latin_letters_.freeze(); | 69 non_ascii_latin_letters_.freeze(); |
| 97 | 70 |
| 98 // The following two sets are parts of |dangerous_patterns_|. | 71 // These letters are parts of |dangerous_patterns_|. |
| 99 kana_letters_exceptions_ = icu::UnicodeSet( | 72 kana_letters_exceptions_ = icu::UnicodeSet( |
| 100 UNICODE_STRING_SIMPLE("[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb-\\u30fe]"), | 73 UNICODE_STRING_SIMPLE("[\\u3078-\\u307a\\u30d8-\\u30da\\u30fb-\\u30fe]"), |
| 101 status); | 74 status); |
| 102 kana_letters_exceptions_.freeze(); | 75 kana_letters_exceptions_.freeze(); |
| 103 combining_diacritics_exceptions_ = | |
| 104 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[\\u0300-\\u0339]"), status); | |
| 105 combining_diacritics_exceptions_.freeze(); | |
| 106 | 76 |
| 107 // These Cyrillic letters look like Latin. A domain label entirely made of | 77 // These Cyrillic letters look like Latin. A domain label entirely made of |
| 108 // these letters is blocked as a simplified whole-script-spoofable. | 78 // these letters is blocked as a simplified whole-script-spoofable. |
| 109 cyrillic_letters_latin_alike_ = | 79 cyrillic_letters_latin_alike_ = |
| 110 icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); | 80 icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); |
| 111 cyrillic_letters_latin_alike_.freeze(); | 81 cyrillic_letters_latin_alike_.freeze(); |
| 112 | 82 |
| 113 cyrillic_letters_ = | 83 cyrillic_letters_ = |
| 114 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status); | 84 icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status); |
| 115 cyrillic_letters_.freeze(); | 85 cyrillic_letters_.freeze(); |
| 116 | 86 |
| 117 DCHECK(U_SUCCESS(status)); | 87 DCHECK(U_SUCCESS(status)); |
| 118 // This set is used to determine whether or not to apply a slow | |
| 119 // transliteration to remove diacritics to a given hostname before the | |
| 120 // confusable skeleton calculation for comparison with top domain names. If | |
| 121 // it has any character outside the set, the expensive step will be skipped | |
| 122 // because it cannot match any of top domain names. | |
| 123 // The last ([\u0300-\u0339] is a shorthand for "[:Identifier_Status=Allowed:] | |
| 124 // & [:Script_Extensions=Inherited:] - [\\u200C\\u200D]". The latter is a | |
| 125 // subset of the former but it does not matter because hostnames with | |
| 126 // characters outside the latter set would be rejected in an earlier step. | |
| 127 lgc_letters_n_ascii_ = icu::UnicodeSet( | |
| 128 UNICODE_STRING_SIMPLE("[[:Latin:][:Greek:][:Cyrillic:][0-9\\u002e_" | |
| 129 "\\u002d][\\u0300-\\u0339]]"), | |
| 130 status); | |
| 131 lgc_letters_n_ascii_.freeze(); | |
| 132 | |
| 133 // Used for diacritics-removal before the skeleton calculation. Add | |
| 134 // "ł > l; ø > o; đ > d" that are not handled by "NFD; Nonspacing mark | |
| 135 // removal; NFC". On top of that, supplement the Unicode confusable list by | |
| 136 // replacing {U+043A (к), U+0138(ĸ), U+03BA(κ)}, U+04CF (ӏ) and U+043F(п) by | |
| 137 // 'k', 'l' and 'n', respectively. | |
| 138 // TODO(jshin): Revisit "ł > l; ø > o" mapping. | |
| 139 UParseError parse_error; | |
| 140 transliterator_.reset(icu::Transliterator::createFromRules( | |
| 141 UNICODE_STRING_SIMPLE("DropAcc"), | |
| 142 icu::UnicodeString("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;" | |
| 143 " ł > l; ø > o; đ > d; ӏ > l; [кĸκ] > k; п > n;"), | |
| 144 UTRANS_FORWARD, parse_error, status)); | |
| 145 DCHECK(U_SUCCESS(status)) | |
| 146 << "Spoofchecker initalization failed due to an error: " | |
| 147 << u_errorName(status); | |
| 148 } | 88 } |
| 149 | 89 |
| 150 IDNSpoofChecker::~IDNSpoofChecker() { | 90 IDNSpoofChecker::~IDNSpoofChecker() { |
| 151 uspoof_close(checker_); | 91 uspoof_close(checker_); |
| 152 } | 92 } |
| 153 | 93 |
| 154 bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, | 94 bool IDNSpoofChecker::SafeToDisplayAsUnicode(base::StringPiece16 label, |
| 155 bool is_tld_ascii) { | 95 bool is_tld_ascii) { |
| 156 UErrorCode status = U_ZERO_ERROR; | 96 UErrorCode status = U_ZERO_ERROR; |
| 157 int32_t result = | 97 int32_t result = |
| (...skipping 15 matching lines...) Expand all Loading... |
| 173 // "UTS 46 section 4 Processing step 4" applies validity criteria for | 113 // "UTS 46 section 4 Processing step 4" applies validity criteria for |
| 174 // non-transitional processing (i.e. do not map deviation characters) to any | 114 // non-transitional processing (i.e. do not map deviation characters) to any |
| 175 // punycode labels regardless of whether transitional or non-transitional is | 115 // punycode labels regardless of whether transitional or non-transitional is |
| 176 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted | 116 // chosen. On the other hand, 'fu<sharp-s>' typed or copy and pasted |
| 177 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as | 117 // as Unicode would be canonicalized to 'fuss' by GURL and is displayed as |
| 178 // such. See http://crbug.com/595263 . | 118 // such. See http://crbug.com/595263 . |
| 179 if (deviation_characters_.containsSome(label_string)) | 119 if (deviation_characters_.containsSome(label_string)) |
| 180 return false; | 120 return false; |
| 181 | 121 |
| 182 // If there's no script mixing, the input is regarded as safe without any | 122 // If there's no script mixing, the input is regarded as safe without any |
| 183 // extra check unless it falls into one of three categories: | 123 // extra check unless it contains Kana letter exceptions or it's made entirely |
| 184 // - contains Kana letter exceptions | 124 // of Cyrillic letters that look like Latin letters. Note that the following |
| 185 // - the TLD is ASCII and the input is made entirely of Cyrillic letters | 125 // combinations of scripts are treated as a 'logical' single script. |
| 186 // that look like Latin letters. | |
| 187 // - it has combining diacritic marks. | |
| 188 // Note that the following combinations of scripts are treated as a 'logical' | |
| 189 // single script. | |
| 190 // - Chinese: Han, Bopomofo, Common | 126 // - Chinese: Han, Bopomofo, Common |
| 191 // - Japanese: Han, Hiragana, Katakana, Common | 127 // - Japanese: Han, Hiragana, Katakana, Common |
| 192 // - Korean: Hangul, Han, Common | 128 // - Korean: Hangul, Han, Common |
| 193 result &= USPOOF_RESTRICTION_LEVEL_MASK; | 129 result &= USPOOF_RESTRICTION_LEVEL_MASK; |
| 194 if (result == USPOOF_ASCII) | 130 if (result == USPOOF_ASCII) |
| 195 return true; | 131 return true; |
| 196 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && | 132 if (result == USPOOF_SINGLE_SCRIPT_RESTRICTIVE && |
| 197 kana_letters_exceptions_.containsNone(label_string) && | 133 kana_letters_exceptions_.containsNone(label_string)) { |
| 198 combining_diacritics_exceptions_.containsNone(label_string)) { | |
| 199 // Check Cyrillic confusable only for ASCII TLDs. | 134 // Check Cyrillic confusable only for ASCII TLDs. |
| 200 return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); | 135 return !is_tld_ascii || !IsMadeOfLatinAlikeCyrillic(label_string); |
| 201 } | 136 } |
| 202 | 137 |
| 203 // Additional checks for |label| with multiple scripts, one of which is Latin. | 138 // Additional checks for |label| with multiple scripts, one of which is Latin. |
| 204 // Disallow non-ASCII Latin letters to mix with a non-Latin script. | 139 // Disallow non-ASCII Latin letters to mix with a non-Latin script. |
| 205 // Note that the non-ASCII Latin check should not be applied when the entire | 140 if (non_ascii_latin_letters_.containsSome(label_string)) |
| 206 // label is made of Latin. Checking with lgc_letters set here should be fine | |
| 207 // because script mixing of LGC is already rejected. | |
| 208 if (non_ascii_latin_letters_.containsSome(label_string) && | |
| 209 !lgc_letters_n_ascii_.containsAll(label_string)) | |
| 210 return false; | 141 return false; |
| 211 | 142 |
| 212 if (!tls_index.initialized()) | 143 if (!tls_index.initialized()) |
| 213 tls_index.Initialize(&OnThreadTermination); | 144 tls_index.Initialize(&OnThreadTermination); |
| 214 icu::RegexMatcher* dangerous_pattern = | 145 icu::RegexMatcher* dangerous_pattern = |
| 215 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get()); | 146 reinterpret_cast<icu::RegexMatcher*>(tls_index.Get()); |
| 216 if (!dangerous_pattern) { | 147 if (!dangerous_pattern) { |
| 217 // Disallow the katakana no, so, zo, or n, as they may be mistaken for | 148 // Disallow the katakana no, so, zo, or n, as they may be mistaken for |
| 218 // slashes when they're surrounded by non-Japanese scripts (i.e. scripts | 149 // slashes when they're surrounded by non-Japanese scripts (i.e. scripts |
| 219 // other than Katakana, Hiragana or Han). If {no, so, zo, n} next to a | 150 // other than Katakana, Hiragana or Han). If {no, so, zo, n} next to a |
| 220 // non-Japanese script on either side is disallowed, legitimate cases like | 151 // non-Japanese script on either side is disallowed, legitimate cases like |
| 221 // '{vitamin in Katakana}b6' are blocked. Note that trying to block those | 152 // '{vitamin in Katakana}b6' are blocked. Note that trying to block those |
| 222 // characters when used alone as a label is futile because those cases | 153 // characters when used alone as a label is futile because those cases |
| 223 // would not reach here. | 154 // would not reach here. |
| 224 // Also disallow what used to be blocked by mixed-script-confusable (MSC) | 155 // Also disallow what used to be blocked by mixed-script-confusable (MSC) |
| 225 // detection. ICU 58 does not detect MSC any more for a single input string. | 156 // detection. ICU 58 does not detect MSC any more for a single input string. |
| 226 // See http://bugs.icu-project.org/trac/ticket/12823 . | 157 // See http://bugs.icu-project.org/trac/ticket/12823 . |
| 227 // TODO(jshin): adjust the pattern once the above ICU bug is fixed. | 158 // TODO(jshin): adjust the pattern once the above ICU bug is fixed. |
| 228 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana | 159 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana |
| 229 // Prolonged Sound) used out-of-context. | 160 // Prolonged Sound) used out-of-context. |
| 230 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 161 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
| 231 // unless they're preceded by a Katakana. | 162 // unless they're preceded by a Katakana. |
| 232 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 163 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
| 233 // (U+30D[8-A]) that look exactly like each other when they're used in a | 164 // (U+30D[8-A]) that look exactly like each other when they're used in a |
| 234 // label otherwise entirely in Katakna or Hiragana. | 165 // label otherwise entirely in Katakna or Hiragana. |
| 235 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 166 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
| 236 // Letter Co) to be next to Latin. | 167 // Letter Co) to be next to Latin. |
| 237 // - Disallow Latin 'o' and 'g' next to Armenian. | 168 // - Disallow Latin 'o' and 'g' next to Armenian. |
| 238 // - Disalow mixing of Latin and Canadian Syllabary. | 169 // - Disalow mixing of Latin and Canadian Syllabary. |
| 239 // - Disallow combining diacritical mark (U+0300-U+0339) after a non-LGC | |
| 240 // character. Other combining diacritical marks are not in the allowed | |
| 241 // character set. | |
| 242 dangerous_pattern = new icu::RegexMatcher( | 170 dangerous_pattern = new icu::RegexMatcher( |
| 243 icu::UnicodeString( | 171 icu::UnicodeString( |
| 244 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" | 172 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}])" |
| 245 R"([\u30ce\u30f3\u30bd\u30be])" | 173 R"([\u30ce\u30f3\u30bd\u30be])" |
| 246 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" | 174 R"([^\p{scx=kana}\p{scx=hira}\p{scx=hani}]|)" |
| 247 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" | 175 R"([^\p{scx=kana}\p{scx=hira}]\u30fc|^\u30fc|)" |
| 248 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" | 176 R"([^\p{scx=kana}][\u30fd\u30fe]|^[\u30fd\u30fe]|)" |
| 249 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" | 177 R"(^[\p{scx=kana}]+[\u3078-\u307a][\p{scx=kana}]+$|)" |
| 250 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" | 178 R"(^[\p{scx=hira}]+[\u30d8-\u30da][\p{scx=hira}]+$|)" |
| 251 R"([a-z]\u30fb|\u30fb[a-z]|)" | 179 R"([a-z]\u30fb|\u30fb[a-z]|)" |
| 252 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" | 180 R"(^[\u0585\u0581]+[a-z]|[a-z][\u0585\u0581]+$|)" |
| 253 R"([a-z][\u0585\u0581]+[a-z]|)" | 181 R"([a-z][\u0585\u0581]+[a-z]|)" |
| 254 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" | 182 R"(^[og]+[\p{scx=armn}]|[\p{scx=armn}][og]+$|)" |
| 255 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" | 183 R"([\p{scx=armn}][og]+[\p{scx=armn}]|)" |
| 256 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}]|)" | 184 R"([\p{sc=cans}].*[a-z]|[a-z].*[\p{sc=cans}])", |
| 257 R"([^\p{scx=latn}\p{scx=grek}\p{scx=cyrl}][\u0300-\u0339])", | |
| 258 -1, US_INV), | 185 -1, US_INV), |
| 259 0, status); | 186 0, status); |
| 260 tls_index.Set(dangerous_pattern); | 187 tls_index.Set(dangerous_pattern); |
| 261 } | 188 } |
| 262 dangerous_pattern->reset(label_string); | 189 dangerous_pattern->reset(label_string); |
| 263 return !dangerous_pattern->find(); | 190 return !dangerous_pattern->find(); |
| 264 } | 191 } |
| 265 | 192 |
| 266 bool IDNSpoofChecker::SimilarToTopDomains(base::StringPiece16 hostname) { | |
| 267 size_t hostname_length = hostname.length() - (hostname.back() == '.' ? 1 : 0); | |
| 268 icu::UnicodeString ustr_host(FALSE, hostname.data(), hostname_length); | |
| 269 // If input has any characters outside Latin-Greek-Cyrillic and [0-9._-], | |
| 270 // there is no point in getting rid of diacritics because combining marks | |
| 271 // attached to non-LGC characters are already blocked. | |
| 272 if (lgc_letters_n_ascii_.span(ustr_host, 0, USET_SPAN_CONTAINED) == | |
| 273 ustr_host.length()) | |
| 274 transliterator_.get()->transliterate(ustr_host); | |
| 275 | |
| 276 UErrorCode status = U_ZERO_ERROR; | |
| 277 icu::UnicodeString ustr_skeleton; | |
| 278 uspoof_getSkeletonUnicodeString(checker_, 0, ustr_host, ustr_skeleton, | |
| 279 &status); | |
| 280 if (U_FAILURE(status)) | |
| 281 return false; | |
| 282 std::string skeleton; | |
| 283 ustr_skeleton.toUTF8String(skeleton); | |
| 284 return LookupMatchInTopDomains(skeleton); | |
| 285 } | |
| 286 | |
| 287 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 193 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( |
| 288 const icu::UnicodeString& label) { | 194 const icu::UnicodeString& label) { |
| 289 // Collect all the Cyrillic letters in |label_string| and see if they're | |
| 290 // a subset of |cyrillic_letters_latin_alike_|. | |
| 291 // A shortcut of defining cyrillic_letters_latin_alike_ to include [0-9] and | 195 // A shortcut of defining cyrillic_letters_latin_alike_ to include [0-9] and |
| 292 // [_-] and checking if the set contains all letters of |label| | 196 // [_-] and checking if the set contains all letters of |label_string| |
| 293 // would work in most cases, but not if a label has non-letters outside | 197 // would work in most cases, but not if a label has non-letters outside |
| 294 // ASCII. | 198 // ASCII. |
| 295 icu::UnicodeSet cyrillic_in_label; | 199 icu::UnicodeSet cyrillic_in_label; |
| 296 icu::StringCharacterIterator it(label); | 200 icu::StringCharacterIterator it(label); |
| 297 for (it.setToStart(); it.hasNext();) { | 201 for (it.setToStart(); it.hasNext();) { |
| 298 const UChar32 c = it.next32PostInc(); | 202 const UChar32 c = it.next32PostInc(); |
| 299 if (cyrillic_letters_.contains(c)) | 203 if (cyrillic_letters_.contains(c)) |
| 300 cyrillic_in_label.add(c); | 204 cyrillic_in_label.add(c); |
| 301 } | 205 } |
| 302 return !cyrillic_in_label.isEmpty() && | 206 return !cyrillic_in_label.isEmpty() && |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 378 allowed_set.remove(0x0F8Cu); | 282 allowed_set.remove(0x0F8Cu); |
| 379 allowed_set.remove(0x0F8Du); | 283 allowed_set.remove(0x0F8Du); |
| 380 allowed_set.remove(0x0F8Eu); | 284 allowed_set.remove(0x0F8Eu); |
| 381 allowed_set.remove(0x0F8Fu); | 285 allowed_set.remove(0x0F8Fu); |
| 382 #endif | 286 #endif |
| 383 | 287 |
| 384 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); | 288 uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status); |
| 385 } | 289 } |
| 386 | 290 |
| 387 } // namespace url_formatter | 291 } // namespace url_formatter |
| OLD | NEW |