OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/net_util.h" | 5 #include "net/base/net_util.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <map> | 9 #include <map> |
10 | 10 |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because | 317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because |
318 // they're gonna be canonicalized to U+0020 and full stop before | 318 // they're gonna be canonicalized to U+0020 and full stop before |
319 // reaching here.) | 319 // reaching here.) |
320 // The original list is available at | 320 // The original list is available at |
321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and | 321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and |
322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703 | 322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703 |
323 | 323 |
324 UErrorCode status = U_ZERO_ERROR; | 324 UErrorCode status = U_ZERO_ERROR; |
325 #ifdef U_WCHAR_IS_UTF16 | 325 #ifdef U_WCHAR_IS_UTF16 |
326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
327 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" | 327 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" |
328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" | 328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" |
329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" | 329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" |
330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" | 330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" |
331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" | 331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" |
332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" | 332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" |
333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" | 333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" |
334 L"[\ufffa-\ufffd]]"), status); | 334 L"[\ufffa-\ufffd]]"), status); |
335 DCHECK(U_SUCCESS(status)); | 335 DCHECK(U_SUCCESS(status)); |
336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
337 // Lone katakana no, so, or n | 337 // Lone katakana no, so, or n |
338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" | 338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" |
339 // Repeating Japanese accent characters | 339 // Repeating Japanese accent characters |
340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), | 340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), |
341 0, status); | 341 0, status); |
342 #else | 342 #else |
343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( | 343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( |
344 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" | 344 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" |
345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" | 345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" |
346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" | 346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" |
347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" | 347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" |
348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" | 348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" |
349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" | 349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" |
350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" | 350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" |
351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); | 351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); |
352 DCHECK(U_SUCCESS(status)); | 352 DCHECK(U_SUCCESS(status)); |
353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( | 353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( |
354 // Lone katakana no, so, or n | 354 // Lone katakana no, so, or n |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
391 component_characters.removeAll(common_characters); | 391 component_characters.removeAll(common_characters); |
392 | 392 |
393 base::StringTokenizer t(languages, ","); | 393 base::StringTokenizer t(languages, ","); |
394 while (t.GetNext()) { | 394 while (t.GetNext()) { |
395 if (IsComponentCoveredByLang(component_characters, t.token())) | 395 if (IsComponentCoveredByLang(component_characters, t.token())) |
396 return true; | 396 return true; |
397 } | 397 } |
398 return false; | 398 return false; |
399 } | 399 } |
400 | 400 |
401 struct uidna_wrapper { | |
Peter Kasting
2013/09/19 20:55:36
Struct names should be CamelCase.
jungshik at Google
2013/09/20 21:33:41
Done.
| |
402 uidna_wrapper() { | |
403 UErrorCode err = U_ZERO_ERROR; | |
404 // This is the option closest to what we had in the past with IDNA 2003 | |
Peter Kasting
2013/09/19 20:55:36
Never write in a comment about what "used to" happ
jungshik at Google
2013/09/20 21:33:41
Done. I just kept TODO comment here and explained
| |
405 // API and matches what IE 10 does except for BiDi check. | |
406 // IDNA 2003 always checks BiDi. We used to allow unassigned code | |
407 // points. However, with our Unicode DB pretty up to date, we'd not | |
408 // need to turn this on. | |
409 // We didn't use STD3 rules and we continue not to. | |
410 // TODO(jungshik) : Change options as different parties (browsers, | |
411 // registrars, search engines) converge toward a consensus. | |
412 int32_t options = UIDNA_CHECK_BIDI; | |
Peter Kasting
2013/09/19 20:55:36
Nit: Don't make a temp for this, just inline it.
jungshik at Google
2013/09/20 21:33:41
Done.
| |
413 value = uidna_openUTS46(options, &err); | |
414 if (U_FAILURE(err)) | |
415 value = NULL; | |
416 } | |
417 | |
418 UIDNA* value; | |
419 }; | |
420 | |
421 static base::LazyInstance<uidna_wrapper>::Leaky | |
422 g_uidna = LAZY_INSTANCE_INITIALIZER; | |
423 | |
401 // Converts one component of a host (between dots) to IDN if safe. The result | 424 // Converts one component of a host (between dots) to IDN if safe. The result |
402 // will be APPENDED to the given output string and will be the same as the input | 425 // will be APPENDED to the given output string and will be the same as the input |
403 // if it is not IDN or the IDN is unsafe to display. Returns whether any | 426 // if it is not IDN or the IDN is unsafe to display. Returns whether any |
404 // conversion was performed. | 427 // conversion was performed. |
405 bool IDNToUnicodeOneComponent(const base::char16* comp, | 428 bool IDNToUnicodeOneComponent(const base::char16* comp, |
406 size_t comp_len, | 429 size_t comp_len, |
407 const std::string& languages, | 430 const std::string& languages, |
408 base::string16* out) { | 431 base::string16* out) { |
409 DCHECK(out); | 432 DCHECK(out); |
410 if (comp_len == 0) | 433 if (comp_len == 0) |
411 return false; | 434 return false; |
412 | 435 |
413 // Only transform if the input can be an IDN component. | 436 // Only transform if the input can be an IDN component. |
414 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; | 437 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; |
415 if ((comp_len > arraysize(kIdnPrefix)) && | 438 if ((comp_len > arraysize(kIdnPrefix)) && |
416 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { | 439 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { |
417 // Repeatedly expand the output string until it's big enough. It looks like | 440 |
418 // ICU will return the required size of the buffer, but that's not | 441 UIDNA* uidna = g_uidna.Get().value; |
419 // documented, so we'll just grow by 2x. This should be rare and is not on a | 442 DCHECK(uidna != NULL); |
420 // critical path. | |
421 size_t original_length = out->length(); | 443 size_t original_length = out->length(); |
422 for (int extra_space = 64; ; extra_space *= 2) { | 444 for (int output_length = 64; ; ) { |
Peter Kasting
2013/09/19 20:55:36
How about writing the loop this way:
// Try t
jungshik at Google
2013/09/20 21:33:41
Done with a slight change because UErrorCode has t
| |
423 UErrorCode status = U_ZERO_ERROR; | 445 UErrorCode status = U_ZERO_ERROR; |
424 out->resize(out->length() + extra_space); | 446 UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
425 int output_chars = uidna_IDNToUnicode(comp, | 447 out->resize(original_length + output_length); |
426 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, | 448 output_length = uidna_labelToUnicode(uidna, comp, |
427 UIDNA_DEFAULT, NULL, &status); | 449 static_cast<int32_t>(comp_len), &(*out)[original_length], |
428 if (status == U_ZERO_ERROR) { | 450 output_length, &info, &status); |
451 if (U_SUCCESS(status) && info.errors == 0) { | |
429 // Converted successfully. | 452 // Converted successfully. |
430 out->resize(original_length + output_chars); | 453 out->resize(original_length + output_length); |
431 if (IsIDNComponentSafe(out->data() + original_length, output_chars, | 454 if (IsIDNComponentSafe(out->data() + original_length, output_length, |
432 languages)) | 455 languages)) |
433 return true; | 456 return true; |
434 } | 457 } |
435 | 458 |
436 if (status != U_BUFFER_OVERFLOW_ERROR) | 459 if (status != U_BUFFER_OVERFLOW_ERROR || info.errors != 0) |
437 break; | 460 break; |
438 } | 461 } |
439 // Failed, revert back to original string. | 462 // Failed, revert back to original string. |
440 out->resize(original_length); | 463 out->resize(original_length); |
441 } | 464 } |
442 | 465 |
443 // We get here with no IDN or on error, in which case we just append the | 466 // We get here with no IDN or on error, in which case we just append the |
444 // literal input. | 467 // literal input. |
445 out->append(comp, comp_len); | 468 out->append(comp, comp_len); |
446 return false; | 469 return false; |
(...skipping 1700 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2147 | 2170 |
2148 NetworkInterface::NetworkInterface(const std::string& name, | 2171 NetworkInterface::NetworkInterface(const std::string& name, |
2149 const IPAddressNumber& address) | 2172 const IPAddressNumber& address) |
2150 : name(name), address(address) { | 2173 : name(name), address(address) { |
2151 } | 2174 } |
2152 | 2175 |
2153 NetworkInterface::~NetworkInterface() { | 2176 NetworkInterface::~NetworkInterface() { |
2154 } | 2177 } |
2155 | 2178 |
2156 } // namespace net | 2179 } // namespace net |
OLD | NEW |