Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: net/base/net_util.cc

Issue 23642003: Support IDNA 2008 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/net_util.h" 5 #include "net/base/net_util.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 #include <map> 9 #include <map>
10 10
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because 317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
318 // they're gonna be canonicalized to U+0020 and full stop before 318 // they're gonna be canonicalized to U+0020 and full stop before
319 // reaching here.) 319 // reaching here.)
320 // The original list is available at 320 // The original list is available at
321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and 321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703 322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703
323 323
324 UErrorCode status = U_ZERO_ERROR; 324 UErrorCode status = U_ZERO_ERROR;
325 #ifdef U_WCHAR_IS_UTF16 325 #ifdef U_WCHAR_IS_UTF16
326 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 326 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
327 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338" 327 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"
328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" 328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" 329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" 330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" 331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" 332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" 333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
334 L"[\ufffa-\ufffd]]"), status); 334 L"[\ufffa-\ufffd]]"), status);
335 DCHECK(U_SUCCESS(status)); 335 DCHECK(U_SUCCESS(status));
336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
337 // Lone katakana no, so, or n 337 // Lone katakana no, so, or n
338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" 338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
339 // Repeating Japanese accent characters 339 // Repeating Japanese accent characters
340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), 340 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
341 0, status); 341 0, status);
342 #else 342 #else
343 icu::UnicodeSet dangerous_characters(icu::UnicodeString( 343 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
344 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338" 344 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" 345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" 346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" 347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" 348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" 349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" 350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
351 "[\\ufffa-\\ufffd]]", -1, US_INV), status); 351 "[\\ufffa-\\ufffd]]", -1, US_INV), status);
352 DCHECK(U_SUCCESS(status)); 352 DCHECK(U_SUCCESS(status));
353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString( 353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
354 // Lone katakana no, so, or n 354 // Lone katakana no, so, or n
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
391 component_characters.removeAll(common_characters); 391 component_characters.removeAll(common_characters);
392 392
393 base::StringTokenizer t(languages, ","); 393 base::StringTokenizer t(languages, ",");
394 while (t.GetNext()) { 394 while (t.GetNext()) {
395 if (IsComponentCoveredByLang(component_characters, t.token())) 395 if (IsComponentCoveredByLang(component_characters, t.token()))
396 return true; 396 return true;
397 } 397 }
398 return false; 398 return false;
399 } 399 }
400 400
401 struct uidna_wrapper {
Peter Kasting 2013/09/19 20:55:36 Struct names should be CamelCase.
jungshik at Google 2013/09/20 21:33:41 Done.
402 uidna_wrapper() {
403 UErrorCode err = U_ZERO_ERROR;
404 // This is the option closest to what we had in the past with IDNA 2003
Peter Kasting 2013/09/19 20:55:36 Never write in a comment about what "used to" happ
jungshik at Google 2013/09/20 21:33:41 Done. I just kept TODO comment here and explained
405 // API and matches what IE 10 does except for BiDi check.
406 // IDNA 2003 always checks BiDi. We used to allow unassigned code
407 // points. However, with our Unicode DB pretty up to date, we'd not
408 // need to turn this on.
409 // We didn't use STD3 rules and we continue not to.
410 // TODO(jungshik) : Change options as different parties (browsers,
411 // registrars, search engines) converge toward a consensus.
412 int32_t options = UIDNA_CHECK_BIDI;
Peter Kasting 2013/09/19 20:55:36 Nit: Don't make a temp for this, just inline it.
jungshik at Google 2013/09/20 21:33:41 Done.
413 value = uidna_openUTS46(options, &err);
414 if (U_FAILURE(err))
415 value = NULL;
416 }
417
418 UIDNA* value;
419 };
420
421 static base::LazyInstance<uidna_wrapper>::Leaky
422 g_uidna = LAZY_INSTANCE_INITIALIZER;
423
401 // Converts one component of a host (between dots) to IDN if safe. The result 424 // Converts one component of a host (between dots) to IDN if safe. The result
402 // will be APPENDED to the given output string and will be the same as the input 425 // will be APPENDED to the given output string and will be the same as the input
403 // if it is not IDN or the IDN is unsafe to display. Returns whether any 426 // if it is not IDN or the IDN is unsafe to display. Returns whether any
404 // conversion was performed. 427 // conversion was performed.
405 bool IDNToUnicodeOneComponent(const base::char16* comp, 428 bool IDNToUnicodeOneComponent(const base::char16* comp,
406 size_t comp_len, 429 size_t comp_len,
407 const std::string& languages, 430 const std::string& languages,
408 base::string16* out) { 431 base::string16* out) {
409 DCHECK(out); 432 DCHECK(out);
410 if (comp_len == 0) 433 if (comp_len == 0)
411 return false; 434 return false;
412 435
413 // Only transform if the input can be an IDN component. 436 // Only transform if the input can be an IDN component.
414 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; 437 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
415 if ((comp_len > arraysize(kIdnPrefix)) && 438 if ((comp_len > arraysize(kIdnPrefix)) &&
416 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { 439 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
417 // Repeatedly expand the output string until it's big enough. It looks like 440
418 // ICU will return the required size of the buffer, but that's not 441 UIDNA* uidna = g_uidna.Get().value;
419 // documented, so we'll just grow by 2x. This should be rare and is not on a 442 DCHECK(uidna != NULL);
420 // critical path.
421 size_t original_length = out->length(); 443 size_t original_length = out->length();
422 for (int extra_space = 64; ; extra_space *= 2) { 444 for (int output_length = 64; ; ) {
Peter Kasting 2013/09/19 20:55:36 How about writing the loop this way: // Try t
jungshik at Google 2013/09/20 21:33:41 Done with a slight change because UErrorCode has t
423 UErrorCode status = U_ZERO_ERROR; 445 UErrorCode status = U_ZERO_ERROR;
424 out->resize(out->length() + extra_space); 446 UIDNAInfo info = UIDNA_INFO_INITIALIZER;
425 int output_chars = uidna_IDNToUnicode(comp, 447 out->resize(original_length + output_length);
426 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space, 448 output_length = uidna_labelToUnicode(uidna, comp,
427 UIDNA_DEFAULT, NULL, &status); 449 static_cast<int32_t>(comp_len), &(*out)[original_length],
428 if (status == U_ZERO_ERROR) { 450 output_length, &info, &status);
451 if (U_SUCCESS(status) && info.errors == 0) {
429 // Converted successfully. 452 // Converted successfully.
430 out->resize(original_length + output_chars); 453 out->resize(original_length + output_length);
431 if (IsIDNComponentSafe(out->data() + original_length, output_chars, 454 if (IsIDNComponentSafe(out->data() + original_length, output_length,
432 languages)) 455 languages))
433 return true; 456 return true;
434 } 457 }
435 458
436 if (status != U_BUFFER_OVERFLOW_ERROR) 459 if (status != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
437 break; 460 break;
438 } 461 }
439 // Failed, revert back to original string. 462 // Failed, revert back to original string.
440 out->resize(original_length); 463 out->resize(original_length);
441 } 464 }
442 465
443 // We get here with no IDN or on error, in which case we just append the 466 // We get here with no IDN or on error, in which case we just append the
444 // literal input. 467 // literal input.
445 out->append(comp, comp_len); 468 out->append(comp, comp_len);
446 return false; 469 return false;
(...skipping 1700 matching lines...) Expand 10 before | Expand all | Expand 10 after
2147 2170
2148 NetworkInterface::NetworkInterface(const std::string& name, 2171 NetworkInterface::NetworkInterface(const std::string& name,
2149 const IPAddressNumber& address) 2172 const IPAddressNumber& address)
2150 : name(name), address(address) { 2173 : name(name), address(address) {
2151 } 2174 }
2152 2175
2153 NetworkInterface::~NetworkInterface() { 2176 NetworkInterface::~NetworkInterface() {
2154 } 2177 }
2155 2178
2156 } // namespace net 2179 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698