| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <vector> | 8 #include <vector> |
| 9 | 9 |
| 10 #include "base/i18n/time_formatting.h" | 10 #include "base/i18n/time_formatting.h" |
| (...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 187 // For now, we borrow the list from Mozilla and tweaked it slightly. | 187 // For now, we borrow the list from Mozilla and tweaked it slightly. |
| 188 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 188 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
| 189 // they're gonna be canonicalized to U+0020 and full stop before | 189 // they're gonna be canonicalized to U+0020 and full stop before |
| 190 // reaching here.) | 190 // reaching here.) |
| 191 // The original list is available at | 191 // The original list is available at |
| 192 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and | 192 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
| 193 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 | 193 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 |
| 194 | 194 |
| 195 UErrorCode status = U_ZERO_ERROR; | 195 UErrorCode status = U_ZERO_ERROR; |
| 196 #ifdef U_WCHAR_IS_UTF16 | 196 #ifdef U_WCHAR_IS_UTF16 |
| 197 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 197 icu::UnicodeSet dangerous_characters( |
| 198 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" | 198 icu::UnicodeString( |
| 199 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 199 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
| 200 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 200 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
| 201 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 201 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
| 202 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 202 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
| 203 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 203 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
| 204 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 204 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
| 205 L"[\ufffa-\ufffd]]"), status); | 205 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
| 206 L"[\ufffa-\ufffd]\U0001f50f\U0001f510\U0001f512\U0001f513]"), |
| 207 status); |
| 206 DCHECK(U_SUCCESS(status)); | 208 DCHECK(U_SUCCESS(status)); |
| 207 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 209 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
| 208 // Lone katakana no, so, or n | 210 // Lone katakana no, so, or n |
| 209 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" | 211 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
| 210 // Repeating Japanese accent characters | 212 // Repeating Japanese accent characters |
| 211 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), | 213 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
| 212 0, status); | 214 0, status); |
| 213 #else | 215 #else |
| 214 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 216 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
| 215 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 217 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
| 216 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 218 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
| 217 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 219 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
| 218 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 220 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
| 219 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 221 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
| 220 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 222 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
| 221 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 223 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
| 222 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 224 "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]", -1, |
| 225 US_INV), status); |
| 223 DCHECK(U_SUCCESS(status)); | 226 DCHECK(U_SUCCESS(status)); |
| 224 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 227 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
| 225 // Lone katakana no, so, or n | 228 // Lone katakana no, so, or n |
| 226 "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" | 229 "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" |
| 227 // Repeating Japanese accent characters | 230 // Repeating Japanese accent characters |
| 228 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), | 231 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), |
| 229 0, status); | 232 0, status); |
| 230 #endif | 233 #endif |
| 231 DCHECK(U_SUCCESS(status)); | 234 DCHECK(U_SUCCESS(status)); |
| 232 icu::UnicodeSet component_characters; | 235 icu::UnicodeSet component_characters; |
| (...skipping 590 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 823 if (offset_for_adjustment) | 826 if (offset_for_adjustment) |
| 824 offsets.push_back(*offset_for_adjustment); | 827 offsets.push_back(*offset_for_adjustment); |
| 825 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, | 828 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
| 826 unescape_rules, new_parsed, prefix_end, &offsets); | 829 unescape_rules, new_parsed, prefix_end, &offsets); |
| 827 if (offset_for_adjustment) | 830 if (offset_for_adjustment) |
| 828 *offset_for_adjustment = offsets[0]; | 831 *offset_for_adjustment = offsets[0]; |
| 829 return result; | 832 return result; |
| 830 } | 833 } |
| 831 | 834 |
| 832 } // namespace net | 835 } // namespace net |
| OLD | NEW |