OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <map> | 8 #include <map> |
| 9 #include <unicode/regex.h> |
9 #include <unicode/ucnv.h> | 10 #include <unicode/ucnv.h> |
10 #include <unicode/uidna.h> | 11 #include <unicode/uidna.h> |
11 #include <unicode/ulocdata.h> | 12 #include <unicode/ulocdata.h> |
12 #include <unicode/uniset.h> | 13 #include <unicode/uniset.h> |
13 #include <unicode/uscript.h> | 14 #include <unicode/uscript.h> |
14 #include <unicode/uset.h> | 15 #include <unicode/uset.h> |
15 | 16 |
16 #include "build/build_config.h" | 17 #include "build/build_config.h" |
17 | 18 |
18 #if defined(OS_WIN) | 19 #if defined(OS_WIN) |
(...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
614 #ifdef U_WCHAR_IS_UTF16 | 615 #ifdef U_WCHAR_IS_UTF16 |
615 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 616 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
616 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 617 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" |
617 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 618 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
618 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 619 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
619 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 620 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
620 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 621 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
621 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 622 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
622 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 623 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
623 L"[\ufffa-\ufffd]]"), status); | 624 L"[\ufffa-\ufffd]]"), status); |
| 625 DCHECK(U_SUCCESS(status)); |
| 626 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
| 627 // Lone katakana no, so, or n |
| 628 L"([^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
| 629 // Repeating Japanese accent characters |
| 630 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c])"), |
| 631 0, status); |
624 #else | 632 #else |
625 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 633 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
626 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 634 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
627 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 635 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
628 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 636 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
629 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 637 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
630 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 638 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
631 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 639 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
632 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 640 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
633 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 641 "[\\ufffa-\\ufffd]]", -1, US_INV), status); |
| 642 DCHECK(U_SUCCESS(status)); |
| 643 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
| 644 // Lone katakana no, so, or n |
| 645 "([^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" |
| 646 // Repeating Japanese accent characters |
| 647 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c])"), |
| 648 0, status); |
634 #endif | 649 #endif |
635 DCHECK(U_SUCCESS(status)); | 650 DCHECK(U_SUCCESS(status)); |
636 icu::UnicodeSet component_characters; | 651 icu::UnicodeSet component_characters; |
637 component_characters.addAll(icu::UnicodeString(str, str_len)); | 652 icu::UnicodeString component_string(str, str_len); |
| 653 component_characters.addAll(component_string); |
638 if (dangerous_characters.containsSome(component_characters)) | 654 if (dangerous_characters.containsSome(component_characters)) |
639 return false; | 655 return false; |
640 | 656 |
| 657 DCHECK(U_SUCCESS(status)); |
| 658 dangerous_patterns.reset(component_string); |
| 659 if (dangerous_patterns.find()) |
| 660 return false; |
| 661 |
641 // If the language list is empty, the result is completely determined | 662 // If the language list is empty, the result is completely determined |
642 // by whether a component is a single script or not. This will block | 663 // by whether a component is a single script or not. This will block |
643 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are | 664 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are |
644 // allowed with |languages| (while it blocks Chinese + Latin letters with | 665 // allowed with |languages| (while it blocks Chinese + Latin letters with |
645 // an accent as should be the case), but we want to err on the safe side | 666 // an accent as should be the case), but we want to err on the safe side |
646 // when |languages| is empty. | 667 // when |languages| is empty. |
647 if (languages.empty()) | 668 if (languages.empty()) |
648 return IsIDNComponentInSingleScript(str, str_len); | 669 return IsIDNComponentInSingleScript(str, str_len); |
649 | 670 |
650 // |common_characters| is made up of ASCII numbers, hyphen, plus and | 671 // |common_characters| is made up of ASCII numbers, hyphen, plus and |
(...skipping 1265 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1916 } | 1937 } |
1917 | 1938 |
1918 int GetPortFromAddrinfo(const struct addrinfo* info) { | 1939 int GetPortFromAddrinfo(const struct addrinfo* info) { |
1919 uint16* port_field = GetPortFieldFromAddrinfo(info); | 1940 uint16* port_field = GetPortFieldFromAddrinfo(info); |
1920 if (!port_field) | 1941 if (!port_field) |
1921 return -1; | 1942 return -1; |
1922 return ntohs(*port_field); | 1943 return ntohs(*port_field); |
1923 } | 1944 } |
1924 | 1945 |
1925 } // namespace net | 1946 } // namespace net |
OLD | NEW |