Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(277)

Side by Side Diff: components/url_formatter/url_formatter.cc

Issue 2871643005: Disallow mixing of Canadian Syllabary and [a-z] (Closed)
Patch Set: fix unittests Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | components/url_formatter/url_formatter_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_formatter/url_formatter.h" 5 #include "components/url_formatter/url_formatter.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <utility> 8 #include <utility>
9 9
10 #include "base/lazy_instance.h" 10 #include "base/lazy_instance.h"
(...skipping 397 matching lines...) Expand 10 before | Expand all | Expand 10 after
408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana 408 // - Disallow U+30FB (Katakana Middle Dot) and U+30FC (Hiragana-Katakana
409 // Prolonged Sound) used out-of-context. 409 // Prolonged Sound) used out-of-context.
410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark) 410 // - Dislallow U+30FD/E (Katakana iteration mark/voiced iteration mark)
411 // unless they're preceded by a Katakana. 411 // unless they're preceded by a Katakana.
412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters 412 // - Disallow three Hiragana letters (U+307[8-A]) or Katakana letters
413 // (U+30D[8-A]) that look exactly like each other when they're used in a 413 // (U+30D[8-A]) that look exactly like each other when they're used in a
414 // label otherwise entirely in Katakna or Hiragana. 414 // label otherwise entirely in Katakna or Hiragana.
415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small 415 // - Disallow U+0585 (Armenian Small Letter Oh) and U+0581 (Armenian Small
416 // Letter Co) to be next to Latin. 416 // Letter Co) to be next to Latin.
417 // - Disallow Latin 'o' and 'g' next to Armenian. 417 // - Disallow Latin 'o' and 'g' next to Armenian.
418 // - Disalow mixing of Latin and Canadian Syllabary.
418 dangerous_pattern = new icu::RegexMatcher( 419 dangerous_pattern = new icu::RegexMatcher(
419 icu::UnicodeString( 420 icu::UnicodeString(
420 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]" 421 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]"
421 "[\\u30ce\\u30f3\\u30bd\\u30be]" 422 "[\\u30ce\\u30f3\\u30bd\\u30be]"
422 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|" 423 "[^\\p{scx=kana}\\p{scx=hira}\\p{scx=hani}]|"
423 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|" 424 "[^\\p{scx=kana}\\p{scx=hira}]\\u30fc|^\\u30fc|"
424 "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|" 425 "[^\\p{scx=kana}][\\u30fd\\u30fe]|^[\\u30fd\\u30fe]|"
425 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|" 426 "^[\\p{scx=kana}]+[\\u3078-\\u307a][\\p{scx=kana}]+$|"
426 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|" 427 "^[\\p{scx=hira}]+[\\u30d8-\\u30da][\\p{scx=hira}]+$|"
427 "[a-z]\\u30fb|\\u30fb[a-z]|" 428 "[a-z]\\u30fb|\\u30fb[a-z]|"
428 "^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|" 429 "^[\\u0585\\u0581]+[a-z]|[a-z][\\u0585\\u0581]+$|"
429 "[a-z][\\u0585\\u0581]+[a-z]|" 430 "[a-z][\\u0585\\u0581]+[a-z]|"
430 "^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|" 431 "^[og]+[\\p{scx=armn}]|[\\p{scx=armn}][og]+$|"
431 "[\\p{scx=armn}][og]+[\\p{scx=armn}]", 432 "[\\p{scx=armn}][og]+[\\p{scx=armn}]|"
433 "[\\p{sc=cans}].*[a-z]|[a-z].*[\\p{sc=cans}]",
432 -1, US_INV), 434 -1, US_INV),
433 0, status); 435 0, status);
434 tls_index.Set(dangerous_pattern); 436 tls_index.Set(dangerous_pattern);
435 } 437 }
436 dangerous_pattern->reset(label_string); 438 dangerous_pattern->reset(label_string);
437 return !dangerous_pattern->find(); 439 return !dangerous_pattern->find();
438 } 440 }
439 441
440 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic( 442 bool IDNSpoofChecker::IsMadeOfLatinAlikeCyrillic(
441 const icu::UnicodeString& label_string) { 443 const icu::UnicodeString& label_string) {
(...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after
850 return base::StartsWith(text, www, base::CompareCase::SENSITIVE) 852 return base::StartsWith(text, www, base::CompareCase::SENSITIVE)
851 ? text.substr(www.length()) : text; 853 ? text.substr(www.length()) : text;
852 } 854 }
853 855
854 base::string16 StripWWWFromHost(const GURL& url) { 856 base::string16 StripWWWFromHost(const GURL& url) {
855 DCHECK(url.is_valid()); 857 DCHECK(url.is_valid());
856 return StripWWW(base::ASCIIToUTF16(url.host_piece())); 858 return StripWWW(base::ASCIIToUTF16(url.host_piece()));
857 } 859 }
858 860
859 } // namespace url_formatter 861 } // namespace url_formatter
OLDNEW
« no previous file with comments | « no previous file | components/url_formatter/url_formatter_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698