| Index: net/base/net_util.cc
|
| ===================================================================
|
| --- net/base/net_util.cc (revision 215936)
|
| +++ net/base/net_util.cc (working copy)
|
| @@ -322,7 +322,7 @@
|
| UErrorCode status = U_ZERO_ERROR;
|
| #ifdef U_WCHAR_IS_UTF16
|
| icu::UnicodeSet dangerous_characters(icu::UnicodeString(
|
| - L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
|
| + L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"
|
| L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
|
| L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
|
| L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
|
| @@ -339,7 +339,7 @@
|
| 0, status);
|
| #else
|
| icu::UnicodeSet dangerous_characters(icu::UnicodeString(
|
| - "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
|
| + "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
|
| "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
|
| "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
|
| "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
|
| @@ -396,6 +396,9 @@
|
| return false;
|
| }
|
|
|
| +static base::LazyInstance<base::Lock>::Leaky
|
| + g_uidna_lock = LAZY_INSTANCE_INITIALIZER;
|
| +
|
| // Converts one component of a host (between dots) to IDN if safe. The result
|
| // will be APPENDED to the given output string and will be the same as the input
|
| // if it is not IDN or the IDN is unsafe to display. Returns whether any
|
| @@ -408,30 +411,48 @@
|
| if (comp_len == 0)
|
| return false;
|
|
|
| +
|
| // Only transform if the input can be an IDN component.
|
| static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
|
| if ((comp_len > arraysize(kIdnPrefix)) &&
|
| !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
|
| - // Repeatedly expand the output string until it's big enough. It looks like
|
| - // ICU will return the required size of the buffer, but that's not
|
| - // documented, so we'll just grow by 2x. This should be rare and is not on a
|
| - // critical path.
|
| + static UIDNA* uidna = NULL; // will be leaked.
|
| + {
|
| + UErrorCode err = U_ZERO_ERROR;
|
| + base::AutoLock lock(g_uidna_lock.Get());
|
| + if (uidna == NULL) {
|
| + // This is the option closest to what we had in the past with IDNA 2003
|
| + // API and matches what IE 10 does except for BiDi check.
|
| + // IDNA 2003 always checks BiDi. We used to allow unassigned code
|
| + // points. However, with our Unicode DB pretty up to date, we'd not
|
| + // need to turn this on.
|
| + // We didn't use STD3 rules and we continue not to.
|
| + // TODO(jungshik) : Review and change options as different
|
| + // parties (browsers, registrars, search engines) converge toward
|
| + // a consensus.
|
| + int32_t options = UIDNA_CHECK_BIDI;
|
| + uidna = uidna_openUTS46(options, &err);
|
| + if (U_FAILURE(err))
|
| + return false;
|
| + }
|
| + }
|
| size_t original_length = out->length();
|
| - for (int extra_space = 64; ; extra_space *= 2) {
|
| + for (int output_length = 64; ; ) {
|
| UErrorCode status = U_ZERO_ERROR;
|
| - out->resize(out->length() + extra_space);
|
| - int output_chars = uidna_IDNToUnicode(comp,
|
| - static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
|
| - UIDNA_DEFAULT, NULL, &status);
|
| - if (status == U_ZERO_ERROR) {
|
| + UIDNAInfo info = UIDNA_INFO_INITIALIZER;
|
| + out->resize(original_length + output_length);
|
| + output_length = uidna_labelToUnicode(uidna, comp,
|
| + static_cast<int32_t>(comp_len), &(*out)[original_length],
|
| + output_length, &info, &status);
|
| + if (U_SUCCESS(status) && info.errors == 0) {
|
| // Converted successfully.
|
| - out->resize(original_length + output_chars);
|
| - if (IsIDNComponentSafe(out->data() + original_length, output_chars,
|
| + out->resize(original_length + output_length);
|
| + if (IsIDNComponentSafe(out->data() + original_length, output_length,
|
| languages))
|
| return true;
|
| }
|
|
|
| - if (status != U_BUFFER_OVERFLOW_ERROR)
|
| + if (status != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
|
| break;
|
| }
|
| // Failed, revert back to original string.
|
|
|