OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/url_formatter/url_formatter.h" | 5 #include "components/url_formatter/url_formatter.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <utility> | 8 #include <utility> |
9 | 9 |
10 #include "base/lazy_instance.h" | 10 #include "base/lazy_instance.h" |
(...skipping 397 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana | 408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana |
409 // Prolonged Sound) used out-of-context. | 409 // Prolonged Sound) used out-of-context. |
410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) | 410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) |
411 // unless they're preceded by a Katakana. | 411 // unless they're preceded by a Katakana. |
412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters | 412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters |
413 // (U+30D[8-A]) that look exactly like each other when they're used in a | 413 // (U+30D[8-A]) that look exactly like each other when they're used in a |
414 // label otherwise entirely in Katakna or Hiragana. | 414 // label otherwise entirely in Katakna or Hiragana. |
415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small | 415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small |
416 // Letter Co) to be next to Latin. | 416 // Letter Co) to be next to Latin. |
417 // - Disallow Latin 'o' and 'g' next to Armenian. | 417 // - Disallow Latin 'o' and 'g' next to Armenian. |
| 418 // - Disalow mixing of Latin and Canadian Syllabary. |
418 dangerous_pattern = new icu::RegexMatcher( | 419 dangerous_pattern = new icu::RegexMatcher( |
419 icu::UnicodeString( | 420 icu::UnicodeString( |
420 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]" | 421 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]" |
421 "[\\u30ce\\u30f3\\u30bd\\u30be]" | 422 "[\\u30ce\\u30f3\\u30bd\\u30be]" |
422 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|" | 423 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|" |
423 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|" | 424 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|" |
424 "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|" | 425 "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|" |
425 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|" | 426 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|" |
426 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|" | 427 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|" |
427 "[a-z]\\u30fb|\\u30fb[a-z]|" | 428 "[a-z]\\u30fb|\\u30fb[a-z]|" |
428 "^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|" | 429 "^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|" |
429 "[a-z][\\u0585\\u0581]+[a-z]|" | 430 "[a-z][\\u0585\\u0581]+[a-z]|" |
430 "^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|" | 431 "^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|" |
431 "[\\p{scx=armn}][og]+[\\p{scx=armn}]", | 432 "[\\p{scx=armn}][og]+[\\p{scx=armn}]|" |
| 433 "[\\p{sc=cans}].*[a-z]|[a-z].*[\\p{sc=cans}]", |
432 -1, US_INV), | 434 -1, US_INV), |
433 0, status); | 435 0, status); |
434 tls_index.Set(dangerous_pattern); | 436 tls_index.Set(dangerous_pattern); |
435 } | 437 } |
436 dangerous_pattern->reset(label_string); | 438 dangerous_pattern->reset(label_string); |
437 return !dangerous_pattern->find(); | 439 return !dangerous_pattern->find(); |
438 } | 440 } |
439 | 441 |
440 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( | 442 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( |
441 const icu::UnicodeString& label_string) { | 443 const icu::UnicodeString& label_string) { |
(...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
850 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) | 852 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) |
851 ? text.substr(www.length()) : text; | 853 ? text.substr(www.length()) : text; |
852 } | 854 } |
853 | 855 |
854 base::string16 StripWWWFromHost(const GURL& url) { | 856 base::string16 StripWWWFromHost(const GURL& url) { |
855 DCHECK(url.is_valid()); | 857 DCHECK(url.is_valid()); |
856 return StripWWW(base::ASCIIToUTF16(url.host_piece())); | 858 return StripWWW(base::ASCIIToUTF16(url.host_piece())); |
857 } | 859 } |
858 | 860 |
859 } // namespace url_formatter | 861 } // namespace url_formatter |
OLD | NEW |