OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <map> | 9 #include <map> |
10 | 10 |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
318 // they're gonna be canonicalized to U+0020 and full stop before | 318 // they're gonna be canonicalized to U+0020 and full stop before |
319 // reaching here.) | 319 // reaching here.) |
320 // The original list is available at | 320 // The original list is available at |
321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and | 321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 | 322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#
703 |
323 | 323 |
324 UErrorCode status = U_ZERO_ERROR; | 324 UErrorCode status = U_ZERO_ERROR; |
325 #ifdef U_WCHAR_IS_UTF16 | 325 #ifdef U_WCHAR_IS_UTF16 |
326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
327 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 327 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
334 L"[\ufffa-\ufffd]]"), status); | 334 L"[\ufffa-\ufffd]]"), status); |
335 DCHECK(U_SUCCESS(status)); | 335 DCHECK(U_SUCCESS(status)); |
336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
337 // Lone katakana no, so, or n | 337 // Lone katakana no, so, or n |
338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" | 338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
339 // Repeating Japanese accent characters | 339 // Repeating Japanese accent characters |
340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), | 340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
341 0, status); | 341 0, status); |
342 #else | 342 #else |
343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
344 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 344 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); |
352 DCHECK(U_SUCCESS(status)); | 352 DCHECK(U_SUCCESS(status)); |
353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
354 // Lone katakana no, so, or n | 354 // Lone katakana no, so, or n |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
391 component_characters.removeAll(common_characters); | 391 component_characters.removeAll(common_characters); |
392 | 392 |
393 base::StringTokenizer t(languages, ","); | 393 base::StringTokenizer t(languages, ","); |
394 while (t.GetNext()) { | 394 while (t.GetNext()) { |
395 if (IsComponentCoveredByLang(component_characters, t.token())) | 395 if (IsComponentCoveredByLang(component_characters, t.token())) |
396 return true; | 396 return true; |
397 } | 397 } |
398 return false; | 398 return false; |
399 } | 399 } |
400 | 400 |
| 401 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to |
| 402 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). |
| 403 // |
| 404 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with |
| 405 // the backward compatibility in mind. What it does: |
| 406 // |
| 407 // 1. Use the up-to-date Unicode data. |
| 408 // 2. Define a case folding/mapping with the up-to-date Unicode data as |
| 409 // in IDNA 2003. |
| 410 // 3. Use transitional mechanism for 4 deviation characters (sharp-s, |
| 411 // final sigma, ZWJ and ZWNJ) for now. |
| 412 // 4. Continue to allow symbols and punctuations. |
| 413 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. |
| 414 // 6. Do not apply STD3 rules |
| 415 // 7. Do not allow unassigned code points. |
| 416 // |
| 417 // It also closely matches what IE 10 does except for the BiDi check ( |
| 418 // http://goo.gl/3XBhqw ). |
| 419 // See http://http://unicode.org/reports/tr46/ and references therein |
| 420 // for more details. |
| 421 struct UIDNAWrapper { |
| 422 UIDNAWrapper() { |
| 423 UErrorCode err = U_ZERO_ERROR; |
| 424 // TODO(jungshik): Change options as different parties (browsers, |
| 425 // registrars, search engines) converge toward a consensus. |
| 426 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); |
| 427 if (U_FAILURE(err)) |
| 428 value = NULL; |
| 429 } |
| 430 |
| 431 UIDNA* value; |
| 432 }; |
| 433 |
| 434 static base::LazyInstance<UIDNAWrapper>::Leaky |
| 435 g_uidna = LAZY_INSTANCE_INITIALIZER; |
| 436 |
401 // Converts one component of a host (between dots) to IDN if safe. The result | 437 // Converts one component of a host (between dots) to IDN if safe. The result |
402 // will be APPENDED to the given output string and will be the same as the input | 438 // will be APPENDED to the given output string and will be the same as the input |
403 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 439 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
404 // conversion was performed. | 440 // conversion was performed. |
405 bool IDNToUnicodeOneComponent(const base::char16* comp, | 441 bool IDNToUnicodeOneComponent(const base::char16* comp, |
406 size_t comp_len, | 442 size_t comp_len, |
407 const std::string& languages, | 443 const std::string& languages, |
408 base::string16* out) { | 444 base::string16* out) { |
409 DCHECK(out); | 445 DCHECK(out); |
410 if (comp_len == 0) | 446 if (comp_len == 0) |
411 return false; | 447 return false; |
412 | 448 |
413 // Only transform if the input can be an IDN component. | 449 // Only transform if the input can be an IDN component. |
414 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 450 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
415 if ((comp_len > arraysize(kIdnPrefix)) && | 451 if ((comp_len > arraysize(kIdnPrefix)) && |
416 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { | 452 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { |
417 // Repeatedly expand the output string until it's big enough. It looks like | 453 UIDNA* uidna = g_uidna.Get().value; |
418 // ICU will return the required size of the buffer, but that's not | 454 DCHECK(uidna != NULL); |
419 // documented, so we'll just grow by 2x. This should be rare and is not on a | |
420 // critical path. | |
421 size_t original_length = out->length(); | 455 size_t original_length = out->length(); |
422 for (int extra_space = 64; ; extra_space *= 2) { | 456 int output_length = 64; |
423 UErrorCode status = U_ZERO_ERROR; | 457 UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
424 out->resize(out->length() + extra_space); | 458 UErrorCode status; |
425 int output_chars = uidna_IDNToUnicode(comp, | 459 do { |
426 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, | 460 out->resize(original_length + output_length); |
427 UIDNA_DEFAULT, NULL, &status); | 461 status = U_ZERO_ERROR; |
428 if (status == U_ZERO_ERROR) { | 462 // This returns the actual length required. If this is more than 64 |
429 // Converted successfully. | 463 // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try |
430 out->resize(original_length + output_chars); | 464 // the conversion again, but with a sufficiently large buffer. |
431 if (IsIDNComponentSafe(out->data() + original_length, output_chars, | 465 output_length = uidna_labelToUnicode( |
432 languages)) | 466 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length], |
433 return true; | 467 output_length, &info, &status); |
434 } | 468 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); |
435 | 469 |
436 if (status != U_BUFFER_OVERFLOW_ERROR) | 470 if (U_SUCCESS(status) && info.errors == 0) { |
437 break; | 471 // Converted successfully. Ensure that the converted component |
| 472 // can be safely displayed to the user. |
| 473 out->resize(original_length + output_length); |
| 474 if (IsIDNComponentSafe(out->data() + original_length, output_length, |
| 475 languages)) |
| 476 return true; |
438 } | 477 } |
439 // Failed, revert back to original string. | 478 |
| 479 // Something went wrong. Revert to original string. |
440 out->resize(original_length); | 480 out->resize(original_length); |
441 } | 481 } |
442 | 482 |
443 // We get here with no IDN or on error, in which case we just append the | 483 // We get here with no IDN or on error, in which case we just append the |
444 // literal input. | 484 // literal input. |
445 out->append(comp, comp_len); | 485 out->append(comp, comp_len); |
446 return false; | 486 return false; |
447 } | 487 } |
448 | 488 |
449 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. | 489 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. |
(...skipping 1697 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2147 | 2187 |
2148 NetworkInterface::NetworkInterface(const std::string& name, | 2188 NetworkInterface::NetworkInterface(const std::string& name, |
2149 const IPAddressNumber& address) | 2189 const IPAddressNumber& address) |
2150 : name(name), address(address) { | 2190 : name(name), address(address) { |
2151 } | 2191 } |
2152 | 2192 |
2153 NetworkInterface::~NetworkInterface() { | 2193 NetworkInterface::~NetworkInterface() { |
2154 } | 2194 } |
2155 | 2195 |
2156 } // namespace net | 2196 } // namespace net |
OLD | NEW |