| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <utility> | 8 #include <utility> |
| 9 | 9 |
| 10 #include "base/lazy_instance.h" | 10 #include "base/lazy_instance.h" |
| (...skipping 397 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana | 408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana |
| 409 // Prolonged Sound) used out-of-context. | 409 // Prolonged Sound) used out-of-context. |
| 410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
| 411 // unless they're preceded by a Katakana. | 411 // unless they're preceded by a Katakana. |
| 412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
| 413 // (U+30D[8-A]) that look exactly like each other when they're used in a | 413 // (U+30D[8-A]) that look exactly like each other when they're used in a |
| 414 // label otherwise entirely in Katakna or Hiragana. | 414 // label otherwise entirely in Katakna or Hiragana. |
| 415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
| 416 // Letter Co) to be next to Latin. | 416 // Letter Co) to be next to Latin. |
| 417 // - Disallow Latin 'o' and 'g' next to Armenian. | 417 // - Disallow Latin 'o' and 'g' next to Armenian. |
| 418 // - Disalow mixing of Latin and Canadian Syllabary. |
| 418 dangerous_pattern = new icu::RegexMatcher( | 419 dangerous_pattern = new icu::RegexMatcher( |
| 419 icu::UnicodeString( | 420 icu::UnicodeString( |
| 420 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]" | 421 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]" |
| 421 "[\\u30ce\\u30f3\\u30bd\\u30be]" | 422 "[\\u30ce\\u30f3\\u30bd\\u30be]" |
| 422 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|" | 423 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|" |
| 423 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|" | 424 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|" |
| 424 "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|" | 425 "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|" |
| 425 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|" | 426 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|" |
| 426 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|" | 427 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|" |
| 427 "[a-z]\\u30fb|\\u30fb[a-z]|" | 428 "[a-z]\\u30fb|\\u30fb[a-z]|" |
| 428 "^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|" | 429 "^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|" |
| 429 "[a-z][\\u0585\\u0581]+[a-z]|" | 430 "[a-z][\\u0585\\u0581]+[a-z]|" |
| 430 "^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|" | 431 "^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|" |
| 431 "[\\p{scx=armn}][og]+[\\p{scx=armn}]", | 432 "[\\p{scx=armn}][og]+[\\p{scx=armn}]|" |
| 433 "[\\p{sc=cans}].*[a-z]|[a-z].*[\\p{sc=cans}]", |
| 432 -1, US_INV), | 434 -1, US_INV), |
| 433 0, status); | 435 0, status); |
| 434 tls_index.Set(dangerous_pattern); | 436 tls_index.Set(dangerous_pattern); |
| 435 } | 437 } |
| 436 dangerous_pattern->reset(label_string); | 438 dangerous_pattern->reset(label_string); |
| 437 return !dangerous_pattern->find(); | 439 return !dangerous_pattern->find(); |
| 438 } | 440 } |
| 439 | 441 |
| 440 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 442 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( |
| 441 const icu::UnicodeString& label_string) { | 443 const icu::UnicodeString& label_string) { |
| (...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 850 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 852 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
| 851 ? text.substr(www.length()) : text; | 853 ? text.substr(www.length()) : text; |
| 852 } | 854 } |
| 853 | 855 |
| 854 base::string16 StripWWWFromHost(const GURL& url) { | 856 base::string16 StripWWWFromHost(const GURL& url) { |
| 855 DCHECK(url.is_valid()); | 857 DCHECK(url.is_valid()); |
| 856 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 858 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
| 857 } | 859 } |
| 858 | 860 |
| 859 } // namespace url_formatter | 861 } // namespace url_formatter |
| OLD | NEW |