net/base/net_util.cc - Issue 23642003: Support IDNA 2008

Side by Side Diff: net/base/net_util.cc

Issue 23642003: Support IDNA 2008 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/net_util.h"	5 #include "net/base/net_util.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <iterator>	8 #include <iterator>

9 #include <map>	9 #include <map>

10	10

(...skipping 306 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because	317 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because

318 // they're gonna be canonicalized to U+0020 and full stop before	318 // they're gonna be canonicalized to U+0020 and full stop before

319 // reaching here.)	319 // reaching here.)

320 // The original list is available at	320 // The original list is available at

321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and	321 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and

322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703	322 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js# 703

323	323

324 UErrorCode status = U_ZERO_ERROR;	324 UErrorCode status = U_ZERO_ERROR;

325 #ifdef U_WCHAR_IS_UTF16	325 #ifdef U_WCHAR_IS_UTF16

326 icu::UnicodeSet dangerous_characters(icu::UnicodeString(	326 icu::UnicodeSet dangerous_characters(icu::UnicodeString(

327 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"	327 L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"

328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"	328 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"

329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"	329 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"

330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"	330 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"

331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"	331 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"

332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"	332 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"

333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"	333 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"

334 L"[\ufffa-\ufffd]]"), status);	334 L"[\ufffa-\ufffd]]"), status);

335 DCHECK(U_SUCCESS(status));	335 DCHECK(U_SUCCESS(status));

336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(	336 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(

337 // Lone katakana no, so, or n	337 // Lone katakana no, so, or n

338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"	338 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"

339 // Repeating Japanese accent characters	339 // Repeating Japanese accent characters

340 L"\|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),	340 L"\|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),

341 0, status);	341 0, status);

342 #else	342 #else

343 icu::UnicodeSet dangerous_characters(icu::UnicodeString(	343 icu::UnicodeSet dangerous_characters(icu::UnicodeString(

344 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"	344 "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"

345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"	345 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"

346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"	346 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"

347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"	347 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"

348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"	348 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"

349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"	349 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"

350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"	350 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"

351 "[\\ufffa-\\ufffd]]", -1, US_INV), status);	351 "[\\ufffa-\\ufffd]]", -1, US_INV), status);

352 DCHECK(U_SUCCESS(status));	352 DCHECK(U_SUCCESS(status));

353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(	353 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(

354 // Lone katakana no, so, or n	354 // Lone katakana no, so, or n

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
391 component_characters.removeAll(common_characters);	391 component_characters.removeAll(common_characters);

392	392

393 base::StringTokenizer t(languages, ",");	393 base::StringTokenizer t(languages, ",");

394 while (t.GetNext()) {	394 while (t.GetNext()) {

395 if (IsComponentCoveredByLang(component_characters, t.token()))	395 if (IsComponentCoveredByLang(component_characters, t.token()))

396 return true;	396 return true;

397 }	397 }

398 return false;	398 return false;

399 }	399 }

400	400

	401 // A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to

	402 // a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().

	403 //

	404 // We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with

	405 // the backward compatibility in mind. What it does:

	406 //

	407 // 1. Use the up-to-date Unicode data.

	408 // 2. Define a case folding/mapping with the up-to-date Unicode data as

	409 // in IDNA 2003.

	410 // 3. Use transitional mechanism for 4 deviation characters (sharp-s,

	411 // final sigma, ZWJ and ZWNJ) for now.

	412 // 4. Continue to allow symbols and punctuations.

	413 // 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.

	414 // 6. Do not apply STD3 rules

	415 // 7. Do not allow unassigned code points.

	416 //

	417 // It also closely matches what IE 10 does except for the BiDi check (

	418 // http://goo.gl/3XBhqw ).

	419 // See http://http://unicode.org/reports/tr46/ and references therein

	420 // for more details.

	421 struct UIDNAWrapper {

	422 UIDNAWrapper() {

	423 UErrorCode err = U_ZERO_ERROR;

	424 // TODO(jungshik): Change options as different parties (browsers,

	425 // registrars, search engines) converge toward a consensus.

	426 value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);

	427 if (U_FAILURE(err))

	428 value = NULL;

	429 }

	430

	431 UIDNA* value;

	432 };

	433

	434 static base::LazyInstance<UIDNAWrapper>::Leaky

	435 g_uidna = LAZY_INSTANCE_INITIALIZER;

	436

401 // Converts one component of a host (between dots) to IDN if safe. The result	437 // Converts one component of a host (between dots) to IDN if safe. The result

402 // will be APPENDED to the given output string and will be the same as the input	438 // will be APPENDED to the given output string and will be the same as the input

403 // if it is not IDN or the IDN is unsafe to display. Returns whether any	439 // if it is not IDN or the IDN is unsafe to display. Returns whether any

404 // conversion was performed.	440 // conversion was performed.

405 bool IDNToUnicodeOneComponent(const base::char16* comp,	441 bool IDNToUnicodeOneComponent(const base::char16* comp,

406 size_t comp_len,	442 size_t comp_len,

407 const std::string& languages,	443 const std::string& languages,

408 base::string16* out) {	444 base::string16* out) {

409 DCHECK(out);	445 DCHECK(out);

410 if (comp_len == 0)	446 if (comp_len == 0)

411 return false;	447 return false;

412	448

413 // Only transform if the input can be an IDN component.	449 // Only transform if the input can be an IDN component.

414 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};	450 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

415 if ((comp_len > arraysize(kIdnPrefix)) &&	451 if ((comp_len > arraysize(kIdnPrefix)) &&

416 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {	452 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {

417 // Repeatedly expand the output string until it's big enough. It looks like	453 UIDNA* uidna = g_uidna.Get().value;

418 // ICU will return the required size of the buffer, but that's not	454 DCHECK(uidna != NULL);

419 // documented, so we'll just grow by 2x. This should be rare and is not on a

420 // critical path.

421 size_t original_length = out->length();	455 size_t original_length = out->length();

422 for (int extra_space = 64; ; extra_space *= 2) {	456 int output_length = 64;

423 UErrorCode status = U_ZERO_ERROR;	457 UIDNAInfo info = UIDNA_INFO_INITIALIZER;

424 out->resize(out->length() + extra_space);	458 UErrorCode status;

425 int output_chars = uidna_IDNToUnicode(comp,	459 do {

426 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,	460 out->resize(original_length + output_length);

427 UIDNA_DEFAULT, NULL, &status);	461 status = U_ZERO_ERROR;

428 if (status == U_ZERO_ERROR) {	462 // This returns the actual length required. If this is more than 64

429 // Converted successfully.	463 // code units, \|status\| will be U_BUFFER_OVERFLOW_ERROR and we'll try

430 out->resize(original_length + output_chars);	464 // the conversion again, but with a sufficiently large buffer.

431 if (IsIDNComponentSafe(out->data() + original_length, output_chars,	465 output_length = uidna_labelToUnicode(

432 languages))	466 uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],

433 return true;	467 output_length, &info, &status);

434 }	468 } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));

435	469

436 if (status != U_BUFFER_OVERFLOW_ERROR)	470 if (U_SUCCESS(status) && info.errors == 0) {

437 break;	471 // Converted successfully. Ensure that the converted component

	472 // can be safely displayed to the user.

	473 out->resize(original_length + output_length);

	474 if (IsIDNComponentSafe(out->data() + original_length, output_length,

	475 languages))

	476 return true;

438 }	477 }

439 // Failed, revert back to original string.	478

	479 // Something went wrong. Revert to original string.

440 out->resize(original_length);	480 out->resize(original_length);

441 }	481 }

442	482

443 // We get here with no IDN or on error, in which case we just append the	483 // We get here with no IDN or on error, in which case we just append the

444 // literal input.	484 // literal input.

445 out->append(comp, comp_len);	485 out->append(comp, comp_len);

446 return false;	486 return false;

447 }	487 }

448	488

449 // Clamps the offsets in \|offsets_for_adjustment\| to the length of \|str\|.	489 // Clamps the offsets in \|offsets_for_adjustment\| to the length of \|str\|.

(...skipping 1697 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2147	2187

2148 NetworkInterface::NetworkInterface(const std::string& name,	2188 NetworkInterface::NetworkInterface(const std::string& name,

2149 const IPAddressNumber& address)	2189 const IPAddressNumber& address)

2150 : name(name), address(address) {	2190 : name(name), address(address) {

2151 }	2191 }

2152	2192

2153 NetworkInterface::~NetworkInterface() {	2193 NetworkInterface::~NetworkInterface() {

2154 }	2194 }

2155	2195

2156 } // namespace net	2196 } // namespace net

OLD	NEW

« no previous file with comments | « chrome/common/net/x509_certificate_model.cc ('k') | url/url.gyp » ('j') | no next file with comments »