Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(681)

Side by Side Diff: net/base/net_util.cc

Issue 23642003: Support IDNA 2008 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 #include <map> 9 #include <map>
10 10
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 315 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
316 // they're gonna be canonicalized to U+0020 and full stop before 316 // they're gonna be canonicalized to U+0020 and full stop before
317 // reaching here.) 317 // reaching here.)
318 // The original list is available at 318 // The original list is available at
319 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and 319 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
320 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703 320 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703
321 321
322 UErrorCode status = U_ZERO_ERROR; 322 UErrorCode status = U_ZERO_ERROR;
323 #ifdef U_WCHAR_IS_UTF16 323 #ifdef U_WCHAR_IS_UTF16
324 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 324 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
325 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 325 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"
326 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 326 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
327 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 327 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
328 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 328 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
329 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 329 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
330 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 330 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
331 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 331 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
332 L"[\ufffa-\ufffd]]"), status); 332 L"[\ufffa-\ufffd]]"), status);
333 DCHECK(U_SUCCESS(status)); 333 DCHECK(U_SUCCESS(status));
334 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 334 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
335 // Lone katakana no, so, or n 335 // Lone katakana no, so, or n
336 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" 336 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
337 // Repeating Japanese accent characters 337 // Repeating Japanese accent characters
338 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), 338 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
339 0, status); 339 0, status);
340 #else 340 #else
341 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 341 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
342 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 342 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
343 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 343 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
344 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 344 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
345 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 345 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
346 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 346 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
347 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 347 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
348 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 348 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
349 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 349 "[\\ufffa-\\ufffd]]", -1, US_INV), status);
350 DCHECK(U_SUCCESS(status)); 350 DCHECK(U_SUCCESS(status));
351 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 351 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
352 // Lone katakana no, so, or n 352 // Lone katakana no, so, or n
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 component_characters.removeAll(common_characters); 389 component_characters.removeAll(common_characters);
390 390
391 base::StringTokenizer t(languages, ","); 391 base::StringTokenizer t(languages, ",");
392 while (t.GetNext()) { 392 while (t.GetNext()) {
393 if (IsComponentCoveredByLang(component_characters, t.token())) 393 if (IsComponentCoveredByLang(component_characters, t.token()))
394 return true; 394 return true;
395 } 395 }
396 return false; 396 return false;
397 } 397 }
398 398
399 static base::LazyInstance<base::Lock>::Leaky
400 g_uidna_lock = LAZY_INSTANCE_INITIALIZER;
401
399 // Converts one component of a host (between dots) to IDN if safe. The result 402 // Converts one component of a host (between dots) to IDN if safe. The result
400 // will be APPENDED to the given output string and will be the same as the input 403 // will be APPENDED to the given output string and will be the same as the input
401 // if it is not IDN or the IDN is unsafe to display. Returns whether any 404 // if it is not IDN or the IDN is unsafe to display. Returns whether any
402 // conversion was performed. 405 // conversion was performed.
403 bool IDNToUnicodeOneComponent(const base::char16* comp, 406 bool IDNToUnicodeOneComponent(const base::char16* comp,
404 size_t comp_len, 407 size_t comp_len,
405 const std::string& languages, 408 const std::string& languages,
406 base::string16* out) { 409 base::string16* out) {
407 DCHECK(out); 410 DCHECK(out);
408 if (comp_len == 0) 411 if (comp_len == 0)
409 return false; 412 return false;
410 413
414
411 // Only transform if the input can be an IDN component. 415 // Only transform if the input can be an IDN component.
412 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 416 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
413 if ((comp_len > arraysize(kIdnPrefix)) && 417 if ((comp_len > arraysize(kIdnPrefix)) &&
414 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { 418 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
415 // Repeatedly expand the output string until it's big enough. It looks like 419 static UIDNA* uidna = NULL; // will be leaked.
416 // ICU will return the required size of the buffer, but that's not 420 {
417 // documented, so we'll just grow by 2x. This should be rare and is not on a 421 UErrorCode err = U_ZERO_ERROR;
418 // critical path. 422 base::AutoLock lock(g_uidna_lock.Get());
423 if (uidna == NULL) {
424 // This is the option closest to what we had in the past with IDNA 2003
425 // API and matches what IE 10 does except for BiDi check.
426 // IDNA 2003 always checks BiDi. We used to allow unassigned code
427 // points. However, with our Unicode DB pretty up to date, we'd not
428 // need to turn this on.
429 // We didn't use STD3 rules and we continue not to.
430 // TODO(jungshik) : Review and change options as different
431 // parties (browsers, registrars, search engines) converge toward
432 // a consensus.
433 int32_t options = UIDNA_CHECK_BIDI;
434 uidna = uidna_openUTS46(options, &err);
435 if (U_FAILURE(err))
436 return false;
437 }
438 }
419 size_t original_length = out->length(); 439 size_t original_length = out->length();
420 for (int extra_space = 64; ; extra_space *= 2) { 440 for (int output_length = 64; ; ) {
421 UErrorCode status = U_ZERO_ERROR; 441 UErrorCode status = U_ZERO_ERROR;
422 out->resize(out->length() + extra_space); 442 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
423 int output_chars = uidna_IDNToUnicode(comp, 443 out->resize(original_length + output_length);
424 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, 444 output_length = uidna_labelToUnicode(uidna, comp,
425 UIDNA_DEFAULT, NULL, &status); 445 static_cast<int32_t>(comp_len), &(*out)[original_length],
426 if (status == U_ZERO_ERROR) { 446 output_length, &info, &status);
447 if (U_SUCCESS(status) && info.errors == 0) {
427 // Converted successfully. 448 // Converted successfully.
428 out->resize(original_length + output_chars); 449 out->resize(original_length + output_length);
429 if (IsIDNComponentSafe(out->data() + original_length, output_chars, 450 if (IsIDNComponentSafe(out->data() + original_length, output_length,
430 languages)) 451 languages))
431 return true; 452 return true;
432 } 453 }
433 454
434 if (status != U_BUFFER_OVERFLOW_ERROR) 455 if (status != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
435 break; 456 break;
436 } 457 }
437 // Failed, revert back to original string. 458 // Failed, revert back to original string.
438 out->resize(original_length); 459 out->resize(original_length);
439 } 460 }
440 461
441 // We get here with no IDN or on error, in which case we just append the 462 // We get here with no IDN or on error, in which case we just append the
442 // literal input. 463 // literal input.
443 out->append(comp, comp_len); 464 out->append(comp, comp_len);
444 return false; 465 return false;
(...skipping 1636 matching lines...) Expand 10 before | Expand all | Expand 10 after
2081 2102
2082 NetworkInterface::NetworkInterface(const std::string& name, 2103 NetworkInterface::NetworkInterface(const std::string& name,
2083 const IPAddressNumber& address) 2104 const IPAddressNumber& address)
2084 : name(name), address(address) { 2105 : name(name), address(address) {
2085 } 2106 }
2086 2107
2087 NetworkInterface::~NetworkInterface() { 2108 NetworkInterface::~NetworkInterface() {
2088 } 2109 }
2089 2110
2090 } // namespace net 2111 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698