Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: net/base/net_util.cc

Issue 3011012: Add URL filter to trigger punycode for Japanese homographic sequences.... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | net/base/net_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <map> 8 #include <map>
9 #include <unicode/regex.h>
9 #include <unicode/ucnv.h> 10 #include <unicode/ucnv.h>
10 #include <unicode/uidna.h> 11 #include <unicode/uidna.h>
11 #include <unicode/ulocdata.h> 12 #include <unicode/ulocdata.h>
12 #include <unicode/uniset.h> 13 #include <unicode/uniset.h>
13 #include <unicode/uscript.h> 14 #include <unicode/uscript.h>
14 #include <unicode/uset.h> 15 #include <unicode/uset.h>
15 16
16 #include "build/build_config.h" 17 #include "build/build_config.h"
17 18
18 #if defined(OS_WIN) 19 #if defined(OS_WIN)
(...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after
614 #ifdef U_WCHAR_IS_UTF16 615 #ifdef U_WCHAR_IS_UTF16
615 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 616 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
616 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 617 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
617 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 618 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
618 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 619 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
619 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 620 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
620 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 621 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
621 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 622 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
622 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 623 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
623 L"[\ufffa-\ufffd]]"), status); 624 L"[\ufffa-\ufffd]]"), status);
625 DCHECK(U_SUCCESS(status));
626 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
627 // Lone katakana no, so, or n
628 L"([^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
629 // Repeating Japanese accent characters
630 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c])"),
631 0, status);
624 #else 632 #else
625 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 633 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
626 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 634 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
627 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 635 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
628 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 636 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
629 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 637 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
630 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 638 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
631 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 639 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
632 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 640 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
633 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 641 "[\\ufffa-\\ufffd]]", -1, US_INV), status);
642 DCHECK(U_SUCCESS(status));
643 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
644 // Lone katakana no, so, or n
645 "([^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"
646 // Repeating Japanese accent characters
647 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c])"),
648 0, status);
634 #endif 649 #endif
635 DCHECK(U_SUCCESS(status)); 650 DCHECK(U_SUCCESS(status));
636 icu::UnicodeSet component_characters; 651 icu::UnicodeSet component_characters;
637 component_characters.addAll(icu::UnicodeString(str, str_len)); 652 icu::UnicodeString component_string(str, str_len);
653 component_characters.addAll(component_string);
638 if (dangerous_characters.containsSome(component_characters)) 654 if (dangerous_characters.containsSome(component_characters))
639 return false; 655 return false;
640 656
657 DCHECK(U_SUCCESS(status));
658 dangerous_patterns.reset(component_string);
659 if (dangerous_patterns.find())
660 return false;
661
641 // If the language list is empty, the result is completely determined 662 // If the language list is empty, the result is completely determined
642 // by whether a component is a single script or not. This will block 663 // by whether a component is a single script or not. This will block
643 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are 664 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are
644 // allowed with |languages| (while it blocks Chinese + Latin letters with 665 // allowed with |languages| (while it blocks Chinese + Latin letters with
645 // an accent as should be the case), but we want to err on the safe side 666 // an accent as should be the case), but we want to err on the safe side
646 // when |languages| is empty. 667 // when |languages| is empty.
647 if (languages.empty()) 668 if (languages.empty())
648 return IsIDNComponentInSingleScript(str, str_len); 669 return IsIDNComponentInSingleScript(str, str_len);
649 670
650 // |common_characters| is made up of ASCII numbers, hyphen, plus and 671 // |common_characters| is made up of ASCII numbers, hyphen, plus and
(...skipping 1265 matching lines...) Expand 10 before | Expand all | Expand 10 after
1916 } 1937 }
1917 1938
1918 int GetPortFromAddrinfo(const struct addrinfo* info) { 1939 int GetPortFromAddrinfo(const struct addrinfo* info) {
1919 uint16* port_field = GetPortFieldFromAddrinfo(info); 1940 uint16* port_field = GetPortFieldFromAddrinfo(info);
1920 if (!port_field) 1941 if (!port_field)
1921 return -1; 1942 return -1;
1922 return ntohs(*port_field); 1943 return ntohs(*port_field);
1923 } 1944 }
1924 1945
1925 } // namespace net 1946 } // namespace net
OLDNEW
« no previous file with comments | « no previous file | net/base/net_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698