Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(206)

Unified Diff: googleurl/src/url_canon_host.cc

Issue 160589: All host names with nonascii characters (cyrillic) + escapable characters (,)... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « googleurl/src/gurl_test_main.cc ('k') | googleurl/src/url_canon_internal.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: googleurl/src/url_canon_host.cc
===================================================================
--- googleurl/src/url_canon_host.cc (revision 110)
+++ googleurl/src/url_canon_host.cc (working copy)
@@ -134,17 +134,21 @@
// |*has_non_ascii| flag.
//
// The return value indicates if the output is a potentially valid host name.
-template<typename CHAR>
-bool DoSimpleHost(const CHAR* host, int host_len, CanonOutput* output,
+template<typename INCHAR, typename OUTCHAR>
+bool DoSimpleHost(const INCHAR* host,
+ int host_len,
+ CanonOutputT<OUTCHAR>* output,
bool* has_non_ascii) {
*has_non_ascii = false;
bool success = true;
- for (int i = 0; i < host_len; i++) {
- unsigned char source = static_cast<unsigned char>(host[i]);
+ for (int i = 0; i < host_len; ++i) {
+ unsigned int source = host[i];
if (source == '%') {
- // Handle unescaping. This will replace |source| with the unescaped char.
- if (!DecodeEscaped(host, &i, host_len, &source)) {
+ // Unescape first, if possible.
+ // Source will be used only if decode operation was successful.
+ if (!DecodeEscaped(host, &i, host_len,
+ reinterpret_cast<unsigned char*>(&source))) {
// Invalid escaped character. There is nothing that can make this
// host valid. We append an escaped percent so the URL looks reasonable
// and mark as failed.
@@ -154,11 +158,7 @@
}
}
- if (source >= 0x80) {
- // Handle non-ASCII.
- *has_non_ascii = true;
- output->push_back(source);
- } else {
+ if (source <= 0x80) {
// We have ASCII input, we can use our lookup table.
unsigned char replacement = kHostCharLookup[source];
if (!replacement) {
@@ -174,15 +174,30 @@
// cased).
output->push_back(replacement);
}
+ } else {
+ // It's a non-ascii char. Just push it to the output.
+ // In case where we have char16 input, and char output it's safe to
+ // cast char16->char only if input string was converted to ASCII.
+ output->push_back(static_cast<OUTCHAR>(source));
+ *has_non_ascii = true;
}
}
+
return success;
}
-// Canonicalizes a host that requires IDN conversion. Returns true on success.
+// Canonicalizes a host that requires IDN conversion. Returns true on success
bool DoIDNHost(const char16* src, int src_len, CanonOutput* output) {
+ // We need to escape URL before doing IDN conversion, since punicode strings
+ // cannot be escaped after they are created.
+ RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
+ bool has_non_ascii;
+ DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii);
+
StackBufferW wide_output;
- if (!IDNToASCII(src, src_len, &wide_output)) {
+ if (!IDNToASCII(url_escaped_host.data(),
+ url_escaped_host.length(),
+ &wide_output)) {
// Some error, give up. This will write some reasonable looking
// representation of the string to the output.
AppendInvalidNarrowString(src, 0, src_len, output);
@@ -192,10 +207,9 @@
// Now we check the ASCII output like a normal host. It will also handle
// unescaping. Although we unescaped everything before this function call, if
// somebody does %00 as fullwidth, ICU will convert this to ASCII.
- bool has_non_ascii;
- bool success = DoSimpleHost<char16>(wide_output.data(),
- wide_output.length(),
- output, &has_non_ascii);
+ bool success = DoSimpleHost(wide_output.data(),
+ wide_output.length(),
+ output, &has_non_ascii);
DCHECK(!has_non_ascii);
return success;
}
« no previous file with comments | « googleurl/src/gurl_test_main.cc ('k') | googleurl/src/url_canon_internal.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698