net/base/net_util.cc - Issue 23642003: Support IDNA 2008

Unified Diff: net/base/net_util.cc

Issue 23642003: Support IDNA 2008 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: net/base/net_util.cc

===================================================================

--- net/base/net_util.cc (revision 225023)

+++ net/base/net_util.cc (working copy)

@@ -324,7 +324,7 @@

UErrorCode status = U_ZERO_ERROR;

#ifdef U_WCHAR_IS_UTF16

icu::UnicodeSet dangerous_characters(icu::UnicodeString(

- L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"

+ L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"

L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"

L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"

L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"

@@ -341,7 +341,7 @@

0, status);

#else

icu::UnicodeSet dangerous_characters(icu::UnicodeString(

- "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"

+ "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"

"\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"

"[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"

"[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"

@@ -398,6 +398,42 @@

return false;

}

+// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to

+// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46().

+//

+// We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with

+// the backward compatibility in mind. What it does:

+//

+// 1. Use the up-to-date Unicode data.

+// 2. Define a case folding/mapping with the up-to-date Unicode data as

+// in IDNA 2003.

+// 3. Use transitional mechanism for 4 deviation characters (sharp-s,

+// final sigma, ZWJ and ZWNJ) for now.

+// 4. Continue to allow symbols and punctuations.

+// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.

+// 6. Do not apply STD3 rules

+// 7. Do not allow unassigned code points.

+//

+// It also closely matches what IE 10 does except for the BiDi check (

+// http://goo.gl/3XBhqw ).

+// See http://http://unicode.org/reports/tr46/ and references therein

+// for more details.

+struct UIDNAWrapper {

+ UIDNAWrapper() {

+ UErrorCode err = U_ZERO_ERROR;

+ // TODO(jungshik): Change options as different parties (browsers,

+ // registrars, search engines) converge toward a consensus.

+ value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err);

+ if (U_FAILURE(err))

+ value = NULL;

+ }

+ UIDNA* value;

+};

+static base::LazyInstance<UIDNAWrapper>::Leaky

+ g_uidna = LAZY_INSTANCE_INITIALIZER;

// Converts one component of a host (between dots) to IDN if safe. The result

// will be APPENDED to the given output string and will be the same as the input

// if it is not IDN or the IDN is unsafe to display. Returns whether any

@@ -414,29 +450,33 @@

static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};

if ((comp_len > arraysize(kIdnPrefix)) &&

!memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {

- // Repeatedly expand the output string until it's big enough. It looks like

- // ICU will return the required size of the buffer, but that's not

- // documented, so we'll just grow by 2x. This should be rare and is not on a

- // critical path.

+ UIDNA* uidna = g_uidna.Get().value;

+ DCHECK(uidna != NULL);

size_t original_length = out->length();

- for (int extra_space = 64; ; extra_space *= 2) {

- UErrorCode status = U_ZERO_ERROR;

- out->resize(out->length() + extra_space);

- int output_chars = uidna_IDNToUnicode(comp,

- static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,

- UIDNA_DEFAULT, NULL, &status);

- if (status == U_ZERO_ERROR) {

- // Converted successfully.

- out->resize(original_length + output_chars);

- if (IsIDNComponentSafe(out->data() + original_length, output_chars,

- languages))

- return true;

- }

+ int output_length = 64;

+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;

+ UErrorCode status;

+ do {

+ out->resize(original_length + output_length);

+ status = U_ZERO_ERROR;

+ // This returns the actual length required. If this is more than 64

+ // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try

+ // the conversion again, but with a sufficiently large buffer.

+ output_length = uidna_labelToUnicode(

+ uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length],

+ output_length, &info, &status);

Ryan Sleevi 2013/09/24 23:21:24 Why not use WriteInto here, rather than the resize

Peter Kasting 2013/09/24 23:28:08 It's not so much "this versus WriteInto()" as "thi

Ryan Sleevi 2013/09/24 23:53:56 I guess the concern was the usual "space for NULL"

+ } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0));

- if (status != U_BUFFER_OVERFLOW_ERROR)

- break;

+ if (U_SUCCESS(status) && info.errors == 0) {

+ // Converted successfully. Ensure that the converted component

+ // can be safely displayed to the user.

+ out->resize(original_length + output_length);

+ if (IsIDNComponentSafe(out->data() + original_length, output_length,

+ languages))

+ return true;

}

- // Failed, revert back to original string.

+ // Something went wrong. Revert to original string.

out->resize(original_length);

}

« no previous file with comments | « chrome/common/net/x509_certificate_model.cc ('k') | url/url_canon_icu.cc » ('j') | no next file with comments »