| OLD | NEW | 
|---|
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" | 
| 6 | 6 | 
| 7 #include <algorithm> | 7 #include <algorithm> | 
| 8 #include <map> | 8 #include <map> | 
|  | 9 #include <unicode/regex.h> | 
| 9 #include <unicode/ucnv.h> | 10 #include <unicode/ucnv.h> | 
| 10 #include <unicode/uidna.h> | 11 #include <unicode/uidna.h> | 
| 11 #include <unicode/ulocdata.h> | 12 #include <unicode/ulocdata.h> | 
| 12 #include <unicode/uniset.h> | 13 #include <unicode/uniset.h> | 
| 13 #include <unicode/uscript.h> | 14 #include <unicode/uscript.h> | 
| 14 #include <unicode/uset.h> | 15 #include <unicode/uset.h> | 
| 15 | 16 | 
| 16 #include "build/build_config.h" | 17 #include "build/build_config.h" | 
| 17 | 18 | 
| 18 #if defined(OS_WIN) | 19 #if defined(OS_WIN) | 
| (...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 614 #ifdef U_WCHAR_IS_UTF16 | 615 #ifdef U_WCHAR_IS_UTF16 | 
| 615   icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 616   icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 
| 616       L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 617       L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 
| 617       L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 618       L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 
| 618       L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 619       L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 
| 619       L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 620       L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 
| 620       L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 621       L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 
| 621       L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 622       L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 
| 622       L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 623       L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 
| 623       L"[\ufffa-\ufffd]]"), status); | 624       L"[\ufffa-\ufffd]]"), status); | 
|  | 625   DCHECK(U_SUCCESS(status)); | 
|  | 626   icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 
|  | 627       // Lone katakana no, so, or n | 
|  | 628       L"([^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" | 
|  | 629       // Repeating Japanese accent characters | 
|  | 630       L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c])"), | 
|  | 631       0, status); | 
| 624 #else | 632 #else | 
| 625   icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 633   icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 
| 626       "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 634       "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 
| 627       "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 635       "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 
| 628       "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 636       "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 
| 629       "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 637       "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 
| 630       "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 638       "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 
| 631       "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 639       "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 
| 632       "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 640       "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 
| 633       "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 641       "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 
|  | 642   DCHECK(U_SUCCESS(status)); | 
|  | 643   icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 
|  | 644       // Lone katakana no, so, or n | 
|  | 645       "([^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" | 
|  | 646       // Repeating Japanese accent characters | 
|  | 647       "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c])"), | 
|  | 648       0, status); | 
| 634 #endif | 649 #endif | 
| 635   DCHECK(U_SUCCESS(status)); | 650   DCHECK(U_SUCCESS(status)); | 
| 636   icu::UnicodeSet component_characters; | 651   icu::UnicodeSet component_characters; | 
| 637   component_characters.addAll(icu::UnicodeString(str, str_len)); | 652   icu::UnicodeString component_string(str, str_len); | 
|  | 653   component_characters.addAll(component_string); | 
| 638   if (dangerous_characters.containsSome(component_characters)) | 654   if (dangerous_characters.containsSome(component_characters)) | 
| 639     return false; | 655     return false; | 
| 640 | 656 | 
|  | 657   DCHECK(U_SUCCESS(status)); | 
|  | 658   dangerous_patterns.reset(component_string); | 
|  | 659   if (dangerous_patterns.find()) | 
|  | 660     return false; | 
|  | 661 | 
| 641   // If the language list is empty, the result is completely determined | 662   // If the language list is empty, the result is completely determined | 
| 642   // by whether a component is a single script or not. This will block | 663   // by whether a component is a single script or not. This will block | 
| 643   // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are | 664   // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are | 
| 644   // allowed with |languages| (while it blocks Chinese + Latin letters with | 665   // allowed with |languages| (while it blocks Chinese + Latin letters with | 
| 645   // an accent as should be the case), but we want to err on the safe side | 666   // an accent as should be the case), but we want to err on the safe side | 
| 646   // when |languages| is empty. | 667   // when |languages| is empty. | 
| 647   if (languages.empty()) | 668   if (languages.empty()) | 
| 648     return IsIDNComponentInSingleScript(str, str_len); | 669     return IsIDNComponentInSingleScript(str, str_len); | 
| 649 | 670 | 
| 650   // |common_characters| is made up of  ASCII numbers, hyphen, plus and | 671   // |common_characters| is made up of  ASCII numbers, hyphen, plus and | 
| (...skipping 1265 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1916 } | 1937 } | 
| 1917 | 1938 | 
| 1918 int GetPortFromAddrinfo(const struct addrinfo* info) { | 1939 int GetPortFromAddrinfo(const struct addrinfo* info) { | 
| 1919   uint16* port_field = GetPortFieldFromAddrinfo(info); | 1940   uint16* port_field = GetPortFieldFromAddrinfo(info); | 
| 1920   if (!port_field) | 1941   if (!port_field) | 
| 1921     return -1; | 1942     return -1; | 
| 1922   return ntohs(*port_field); | 1943   return ntohs(*port_field); | 
| 1923 } | 1944 } | 
| 1924 | 1945 | 
| 1925 }  // namespace net | 1946 }  // namespace net | 
| OLD | NEW | 
|---|