OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <map> | 9 #include <map> |
10 | 10 |
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
315 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 315 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
316 // they're gonna be canonicalized to U+0020 and full stop before | 316 // they're gonna be canonicalized to U+0020 and full stop before |
317 // reaching here.) | 317 // reaching here.) |
318 // The original list is available at | 318 // The original list is available at |
319 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and | 319 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
320 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 | 320 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 |
321 | 321 |
322 UErrorCode status = U_ZERO_ERROR; | 322 UErrorCode status = U_ZERO_ERROR; |
323 #ifdef U_WCHAR_IS_UTF16 | 323 #ifdef U_WCHAR_IS_UTF16 |
324 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 324 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
325 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 325 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
326 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 326 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
327 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 327 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
328 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 328 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
329 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 329 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
330 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 330 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
331 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 331 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
332 L"[\ufffa-\ufffd]]"), status); | 332 L"[\ufffa-\ufffd]]"), status); |
333 DCHECK(U_SUCCESS(status)); | 333 DCHECK(U_SUCCESS(status)); |
334 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 334 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
335 // Lone katakana no, so, or n | 335 // Lone katakana no, so, or n |
336 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" | 336 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
337 // Repeating Japanese accent characters | 337 // Repeating Japanese accent characters |
338 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), | 338 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
339 0, status); | 339 0, status); |
340 #else | 340 #else |
341 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 341 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
342 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 342 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
343 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 343 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
344 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 344 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
345 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 345 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
346 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 346 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
347 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 347 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
348 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 348 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
349 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 349 "[\\ufffa-\\ufffd]]", -1, US_INV), status); |
350 DCHECK(U_SUCCESS(status)); | 350 DCHECK(U_SUCCESS(status)); |
351 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 351 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
352 // Lone katakana no, so, or n | 352 // Lone katakana no, so, or n |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
389 component_characters.removeAll(common_characters); | 389 component_characters.removeAll(common_characters); |
390 | 390 |
391 base::StringTokenizer t(languages, ","); | 391 base::StringTokenizer t(languages, ","); |
392 while (t.GetNext()) { | 392 while (t.GetNext()) { |
393 if (IsComponentCoveredByLang(component_characters, t.token())) | 393 if (IsComponentCoveredByLang(component_characters, t.token())) |
394 return true; | 394 return true; |
395 } | 395 } |
396 return false; | 396 return false; |
397 } | 397 } |
398 | 398 |
| 399 static base::LazyInstance<base::Lock>::Leaky |
| 400 g_uidna_lock = LAZY_INSTANCE_INITIALIZER; |
| 401 |
399 // Converts one component of a host (between dots) to IDN if safe. The result | 402 // Converts one component of a host (between dots) to IDN if safe. The result |
400 // will be APPENDED to the given output string and will be the same as the input | 403 // will be APPENDED to the given output string and will be the same as the input |
401 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 404 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
402 // conversion was performed. | 405 // conversion was performed. |
403 bool IDNToUnicodeOneComponent(const base::char16* comp, | 406 bool IDNToUnicodeOneComponent(const base::char16* comp, |
404 size_t comp_len, | 407 size_t comp_len, |
405 const std::string& languages, | 408 const std::string& languages, |
406 base::string16* out) { | 409 base::string16* out) { |
407 DCHECK(out); | 410 DCHECK(out); |
408 if (comp_len == 0) | 411 if (comp_len == 0) |
409 return false; | 412 return false; |
410 | 413 |
| 414 |
411 // Only transform if the input can be an IDN component. | 415 // Only transform if the input can be an IDN component. |
412 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 416 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
413 if ((comp_len > arraysize(kIdnPrefix)) && | 417 if ((comp_len > arraysize(kIdnPrefix)) && |
414 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { | 418 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { |
415 // Repeatedly expand the output string until it's big enough. It looks like | 419 static UIDNA* uidna = NULL; // will be leaked. |
416 // ICU will return the required size of the buffer, but that's not | 420 { |
417 // documented, so we'll just grow by 2x. This should be rare and is not on a | 421 UErrorCode err = U_ZERO_ERROR; |
418 // critical path. | 422 base::AutoLock lock(g_uidna_lock.Get()); |
| 423 if (uidna == NULL) { |
| 424 // This is the option closest to what we had in the past with IDNA 2003 |
| 425 // API and matches what IE 10 does except for BiDi check. |
| 426 // IDNA 2003 always checks BiDi. We used to allow unassigned code |
| 427 // points. However, with our Unicode DB pretty up to date, we'd not |
| 428 // need to turn this on. |
| 429 // We didn't use STD3 rules and we continue not to. |
| 430 // TODO(jungshik) : Review and change options as different |
| 431 // parties (browsers, registrars, search engines) converge toward |
| 432 // a consensus. |
| 433 int32_t options = UIDNA_CHECK_BIDI; |
| 434 uidna = uidna_openUTS46(options, &err); |
| 435 if (U_FAILURE(err)) |
| 436 return false; |
| 437 } |
| 438 } |
419 size_t original_length = out->length(); | 439 size_t original_length = out->length(); |
420 for (int extra_space = 64; ; extra_space *= 2) { | 440 for (int output_length = 64; ; ) { |
421 UErrorCode status = U_ZERO_ERROR; | 441 UErrorCode status = U_ZERO_ERROR; |
422 out->resize(out->length() + extra_space); | 442 UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
423 int output_chars = uidna_IDNToUnicode(comp, | 443 out->resize(original_length + output_length); |
424 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, | 444 output_length = uidna_labelToUnicode(uidna, comp, |
425 UIDNA_DEFAULT, NULL, &status); | 445 static_cast<int32_t>(comp_len), &(*out)[original_length], |
426 if (status == U_ZERO_ERROR) { | 446 output_length, &info, &status); |
| 447 if (U_SUCCESS(status) && info.errors == 0) { |
427 // Converted successfully. | 448 // Converted successfully. |
428 out->resize(original_length + output_chars); | 449 out->resize(original_length + output_length); |
429 if (IsIDNComponentSafe(out->data() + original_length, output_chars, | 450 if (IsIDNComponentSafe(out->data() + original_length, output_length, |
430 languages)) | 451 languages)) |
431 return true; | 452 return true; |
432 } | 453 } |
433 | 454 |
434 if (status != U_BUFFER_OVERFLOW_ERROR) | 455 if (status != U_BUFFER_OVERFLOW_ERROR || info.errors != 0) |
435 break; | 456 break; |
436 } | 457 } |
437 // Failed, revert back to original string. | 458 // Failed, revert back to original string. |
438 out->resize(original_length); | 459 out->resize(original_length); |
439 } | 460 } |
440 | 461 |
441 // We get here with no IDN or on error, in which case we just append the | 462 // We get here with no IDN or on error, in which case we just append the |
442 // literal input. | 463 // literal input. |
443 out->append(comp, comp_len); | 464 out->append(comp, comp_len); |
444 return false; | 465 return false; |
(...skipping 1636 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2081 | 2102 |
2082 NetworkInterface::NetworkInterface(const std::string& name, | 2103 NetworkInterface::NetworkInterface(const std::string& name, |
2083 const IPAddressNumber& address) | 2104 const IPAddressNumber& address) |
2084 : name(name), address(address) { | 2105 : name(name), address(address) { |
2085 } | 2106 } |
2086 | 2107 |
2087 NetworkInterface::~NetworkInterface() { | 2108 NetworkInterface::~NetworkInterface() { |
2088 } | 2109 } |
2089 | 2110 |
2090 } // namespace net | 2111 } // namespace net |
OLD | NEW |