OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/i18n/time_formatting.h" | 10 #include "base/i18n/time_formatting.h" |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 // For now, we borrow the list from Mozilla and tweaked it slightly. | 187 // For now, we borrow the list from Mozilla and tweaked it slightly. |
188 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 188 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
189 // they're gonna be canonicalized to U+0020 and full stop before | 189 // they're gonna be canonicalized to U+0020 and full stop before |
190 // reaching here.) | 190 // reaching here.) |
191 // The original list is available at | 191 // The original list is available at |
192 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and | 192 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
193 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 | 193 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 |
194 | 194 |
195 UErrorCode status = U_ZERO_ERROR; | 195 UErrorCode status = U_ZERO_ERROR; |
196 #ifdef U_WCHAR_IS_UTF16 | 196 #ifdef U_WCHAR_IS_UTF16 |
197 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 197 icu::UnicodeSet dangerous_characters( |
198 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" | 198 icu::UnicodeString( |
199 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 199 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
200 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 200 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
201 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 201 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
202 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 202 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
203 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 203 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
204 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 204 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
205 L"[\ufffa-\ufffd]]"), status); | 205 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
| 206 L"[\ufffa-\ufffd]\U0001f50f\U0001f510\U0001f512\U0001f513]"), |
| 207 status); |
206 DCHECK(U_SUCCESS(status)); | 208 DCHECK(U_SUCCESS(status)); |
207 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 209 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
208 // Lone katakana no, so, or n | 210 // Lone katakana no, so, or n |
209 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" | 211 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
210 // Repeating Japanese accent characters | 212 // Repeating Japanese accent characters |
211 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), | 213 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
212 0, status); | 214 0, status); |
213 #else | 215 #else |
214 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 216 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
215 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 217 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
216 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 218 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
217 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 219 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
218 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 220 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
219 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 221 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
220 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 222 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
221 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 223 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
222 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 224 "[\\ufffa-\\ufffd]\\U0001f50f\\U0001f510\\U0001f512\\U0001f513]", -1, |
| 225 US_INV), status); |
223 DCHECK(U_SUCCESS(status)); | 226 DCHECK(U_SUCCESS(status)); |
224 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 227 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
225 // Lone katakana no, so, or n | 228 // Lone katakana no, so, or n |
226 "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" | 229 "[^\\p{Katakana}][\\u30ce\\u30f3\\u30bd][^\\p{Katakana}]" |
227 // Repeating Japanese accent characters | 230 // Repeating Japanese accent characters |
228 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), | 231 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), |
229 0, status); | 232 0, status); |
230 #endif | 233 #endif |
231 DCHECK(U_SUCCESS(status)); | 234 DCHECK(U_SUCCESS(status)); |
232 icu::UnicodeSet component_characters; | 235 icu::UnicodeSet component_characters; |
(...skipping 590 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
823 if (offset_for_adjustment) | 826 if (offset_for_adjustment) |
824 offsets.push_back(*offset_for_adjustment); | 827 offsets.push_back(*offset_for_adjustment); |
825 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, | 828 base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |
826 unescape_rules, new_parsed, prefix_end, &offsets); | 829 unescape_rules, new_parsed, prefix_end, &offsets); |
827 if (offset_for_adjustment) | 830 if (offset_for_adjustment) |
828 *offset_for_adjustment = offsets[0]; | 831 *offset_for_adjustment = offsets[0]; |
829 return result; | 832 return result; |
830 } | 833 } |
831 | 834 |
832 } // namespace net | 835 } // namespace net |
OLD | NEW |