Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(287)

Unified Diff: net/base/net_util.cc

Issue 23642003: Support IDNA 2008 (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: net/base/net_util.cc
===================================================================
--- net/base/net_util.cc (revision 223643)
+++ net/base/net_util.cc (working copy)
@@ -324,7 +324,7 @@
UErrorCode status = U_ZERO_ERROR;
#ifdef U_WCHAR_IS_UTF16
icu::UnicodeSet dangerous_characters(icu::UnicodeString(
- L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
+ L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338"
L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
@@ -341,7 +341,7 @@
0, status);
#else
icu::UnicodeSet dangerous_characters(icu::UnicodeString(
- "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
+ "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
"\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
"[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
"[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
@@ -398,6 +398,29 @@
return false;
}
+struct uidna_wrapper {
Peter Kasting 2013/09/19 20:55:36 Struct names should be CamelCase.
jungshik at Google 2013/09/20 21:33:41 Done.
+ uidna_wrapper() {
+ UErrorCode err = U_ZERO_ERROR;
+ // This is the option closest to what we had in the past with IDNA 2003
Peter Kasting 2013/09/19 20:55:36 Never write in a comment about what "used to" happ
jungshik at Google 2013/09/20 21:33:41 Done. I just kept TODO comment here and explained
+ // API and matches what IE 10 does except for BiDi check.
+ // IDNA 2003 always checks BiDi. We used to allow unassigned code
+ // points. However, with our Unicode DB pretty up to date, we'd not
+ // need to turn this on.
+ // We didn't use STD3 rules and we continue not to.
+ // TODO(jungshik) : Change options as different parties (browsers,
+ // registrars, search engines) converge toward a consensus.
+ int32_t options = UIDNA_CHECK_BIDI;
Peter Kasting 2013/09/19 20:55:36 Nit: Don't make a temp for this, just inline it.
jungshik at Google 2013/09/20 21:33:41 Done.
+ value = uidna_openUTS46(options, &err);
+ if (U_FAILURE(err))
+ value = NULL;
+ }
+
+ UIDNA* value;
+};
+
+static base::LazyInstance<uidna_wrapper>::Leaky
+ g_uidna = LAZY_INSTANCE_INITIALIZER;
+
// Converts one component of a host (between dots) to IDN if safe. The result
// will be APPENDED to the given output string and will be the same as the input
// if it is not IDN or the IDN is unsafe to display. Returns whether any
@@ -414,26 +437,26 @@
static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
if ((comp_len > arraysize(kIdnPrefix)) &&
!memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
- // Repeatedly expand the output string until it's big enough. It looks like
- // ICU will return the required size of the buffer, but that's not
- // documented, so we'll just grow by 2x. This should be rare and is not on a
- // critical path.
+
+ UIDNA* uidna = g_uidna.Get().value;
+ DCHECK(uidna != NULL);
size_t original_length = out->length();
- for (int extra_space = 64; ; extra_space *= 2) {
+ for (int output_length = 64; ; ) {
Peter Kasting 2013/09/19 20:55:36 How about writing the loop this way: // Try t
jungshik at Google 2013/09/20 21:33:41 Done with a slight change because UErrorCode has t
UErrorCode status = U_ZERO_ERROR;
- out->resize(out->length() + extra_space);
- int output_chars = uidna_IDNToUnicode(comp,
- static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
- UIDNA_DEFAULT, NULL, &status);
- if (status == U_ZERO_ERROR) {
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+ out->resize(original_length + output_length);
+ output_length = uidna_labelToUnicode(uidna, comp,
+ static_cast<int32_t>(comp_len), &(*out)[original_length],
+ output_length, &info, &status);
+ if (U_SUCCESS(status) && info.errors == 0) {
// Converted successfully.
- out->resize(original_length + output_chars);
- if (IsIDNComponentSafe(out->data() + original_length, output_chars,
+ out->resize(original_length + output_length);
+ if (IsIDNComponentSafe(out->data() + original_length, output_length,
languages))
return true;
}
- if (status != U_BUFFER_OVERFLOW_ERROR)
+ if (status != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
break;
}
// Failed, revert back to original string.

Powered by Google App Engine
This is Rietveld 408576698