Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(157)

Side by Side Diff: net/base/net_util.cc

Issue 23642003: Support IDNA 2008 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/common/net/x509_certificate_model.cc ('k') | url/url_canon_icu.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 #include <map> 9 #include <map>
10 10
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
318 // they're gonna be canonicalized to U+0020 and full stop before 318 // they're gonna be canonicalized to U+0020 and full stop before
319 // reaching here.) 319 // reaching here.)
320 // The original list is available at 320 // The original list is available at
321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and 321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703 322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703
323 323
324 UErrorCode status = U_ZERO_ERROR; 324 UErrorCode status = U_ZERO_ERROR;
325 #ifdef U_WCHAR_IS_UTF16 325 #ifdef U_WCHAR_IS_UTF16
326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 326 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
327 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 327 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"
328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
334 L"[\ufffa-\ufffd]]"), status); 334 L"[\ufffa-\ufffd]]"), status);
335 DCHECK(U_SUCCESS(status)); 335 DCHECK(U_SUCCESS(status));
336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
337 // Lone katakana no, so, or n 337 // Lone katakana no, so, or n
338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" 338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
339 // Repeating Japanese accent characters 339 // Repeating Japanese accent characters
340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), 340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
341 0, status); 341 0, status);
342 #else 342 #else
343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 343 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
344 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 344 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 351 "[\\ufffa-\\ufffd]]", -1, US_INV), status);
352 DCHECK(U_SUCCESS(status)); 352 DCHECK(U_SUCCESS(status));
353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
354 // Lone katakana no, so, or n 354 // Lone katakana no, so, or n
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
391 component_characters.removeAll(common_characters); 391 component_characters.removeAll(common_characters);
392 392
393 base::StringTokenizer t(languages, ","); 393 base::StringTokenizer t(languages, ",");
394 while (t.GetNext()) { 394 while (t.GetNext()) {
395 if (IsComponentCoveredByLang(component_characters, t.token())) 395 if (IsComponentCoveredByLang(component_characters, t.token()))
396 return true; 396 return true;
397 } 397 }
398 return false; 398 return false;
399 } 399 }
400 400
401 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to
402 // UTS46/IDNA 2008 handling object opened with |uidna_openUTS46|.
Peter Kasting 2013/09/23 21:12:44 Nit: Function names don't go inside pipes; instead
403 //
404 // We use UTS46 with BiDiCheck to migrate to IDNA 2008 with
405 // the backward compatibility in mind. What it does :
Peter Kasting 2013/09/23 21:12:44 Migrate from what? Backward compatibility with wh
406 //
407 // 1. Use the up-to-date Unicode data.
408 // 2. Define a case folding/mapping with the up-to-date Unicode data as
409 // in IDNA 2003.
410 // 3. Use transitional mechanism for 4 deviation characters (sharp-s,
411 // final sigma, ZWJ and ZWNJ) for now.
412 // 4. Continue to allow symbols and punctuations.
413 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
414 // 6. Do not apply STD3 rules
415 // 7. Do not allow unassigned code points.
416 //
417 // It also closely matches what IE 10 does except for the BiDi check (
418 // http://goo.gl/3XBhqw ).
419 // See http://http://unicode.org/reports/tr46/ and references therein
420 // for more details.
421 struct UIDNAWrapper {
422 UIDNAWrapper() {
423 UErrorCode err = U_ZERO_ERROR;
424 // TODO(jungshik) : Change options as different parties (browsers,
425 // registrars, search engines) converge toward a consensus.
426 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);
427 if (U_FAILURE(err))
428 value = NULL;
429 }
430
431 UIDNA* value;
432 };
433
434 static base::LazyInstance<UIDNAWrapper>::Leaky
435 g_uidna = LAZY_INSTANCE_INITIALIZER;
436
401 // Converts one component of a host (between dots) to IDN if safe. The result 437 // Converts one component of a host (between dots) to IDN if safe. The result
402 // will be APPENDED to the given output string and will be the same as the input 438 // will be APPENDED to the given output string and will be the same as the input
403 // if it is not IDN or the IDN is unsafe to display. Returns whether any 439 // if it is not IDN or the IDN is unsafe to display. Returns whether any
404 // conversion was performed. 440 // conversion was performed.
405 bool IDNToUnicodeOneComponent(const base::char16* comp, 441 bool IDNToUnicodeOneComponent(const base::char16* comp,
406 size_t comp_len, 442 size_t comp_len,
407 const std::string& languages, 443 const std::string& languages,
408 base::string16* out) { 444 base::string16* out) {
409 DCHECK(out); 445 DCHECK(out);
410 if (comp_len == 0) 446 if (comp_len == 0)
411 return false; 447 return false;
412 448
413 // Only transform if the input can be an IDN component. 449 // Only transform if the input can be an IDN component.
414 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 450 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
415 if ((comp_len > arraysize(kIdnPrefix)) && 451 if ((comp_len > arraysize(kIdnPrefix)) &&
416 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { 452 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
417 // Repeatedly expand the output string until it's big enough. It looks like 453 UIDNA* uidna = g_uidna.Get().value;
418 // ICU will return the required size of the buffer, but that's not 454 DCHECK(uidna != NULL);
419 // documented, so we'll just grow by 2x. This should be rare and is not on a
420 // critical path.
421 size_t original_length = out->length(); 455 size_t original_length = out->length();
422 for (int extra_space = 64; ; extra_space *= 2) { 456 int output_length = 64;
423 UErrorCode status = U_ZERO_ERROR; 457 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
424 out->resize(out->length() + extra_space); 458 UErrorCode status;
425 int output_chars = uidna_IDNToUnicode(comp, 459 do {
426 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, 460 out->resize(original_length + output_length);
427 UIDNA_DEFAULT, NULL, &status); 461 status = U_ZERO_ERROR;
428 if (status == U_ZERO_ERROR) { 462 // This returns the actual length required. If this is more than 64
429 // Converted successfully. 463 // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try
430 out->resize(original_length + output_chars); 464 // the conversion again, but with a sufficiently large buffer.
431 if (IsIDNComponentSafe(out->data() + original_length, output_chars, 465 output_length = uidna_labelToUnicode(
432 languages)) 466 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],
433 return true; 467 output_length, &info, &status);
434 } 468 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));
435 469
436 if (status != U_BUFFER_OVERFLOW_ERROR) 470 if (U_SUCCESS(status) && info.errors == 0) {
437 break; 471 // Converted successfully. Ensure that the converted component
472 // can be safely displayed to the user.
473 out->resize(original_length + output_length);
474 if (IsIDNComponentSafe(out->data() + original_length, output_length,
475 languages))
476 return true;
438 } 477 }
439 // Failed, revert back to original string. 478
479 // Something went wrong. Revert to original string.
440 out->resize(original_length); 480 out->resize(original_length);
441 } 481 }
442 482
443 // We get here with no IDN or on error, in which case we just append the 483 // We get here with no IDN or on error, in which case we just append the
444 // literal input. 484 // literal input.
445 out->append(comp, comp_len); 485 out->append(comp, comp_len);
446 return false; 486 return false;
447 } 487 }
448 488
449 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. 489 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|.
(...skipping 1697 matching lines...) Expand 10 before | Expand all | Expand 10 after
2147 2187
2148 NetworkInterface::NetworkInterface(const std::string& name, 2188 NetworkInterface::NetworkInterface(const std::string& name,
2149 const IPAddressNumber& address) 2189 const IPAddressNumber& address)
2150 : name(name), address(address) { 2190 : name(name), address(address) {
2151 } 2191 }
2152 2192
2153 NetworkInterface::~NetworkInterface() { 2193 NetworkInterface::~NetworkInterface() {
2154 } 2194 }
2155 2195
2156 } // namespace net 2196 } // namespace net
OLDNEW
« no previous file with comments | « chrome/common/net/x509_certificate_model.cc ('k') | url/url_canon_icu.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698