Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(318)

Unified Diff: src/url_canon_host.cc

Issue 114050: url_canon: New CanonicalizeHostVerbose() function. (Closed) Base URL: http://google-url.googlecode.com/svn/trunk/
Patch Set: Address brettw's comments Created 11 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/url_canon.h ('k') | src/url_canon_ip.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/url_canon_host.cc
===================================================================
--- src/url_canon_host.cc (revision 106)
+++ src/url_canon_host.cc (working copy)
@@ -82,7 +82,7 @@
// ' ' ! " # $ % & ' ( ) * + , - . /
kEsc,kEsc,kEsc,kEsc,kEsc, 0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.', 0,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 , 0 ,kEsc,kEsc,kEsc, 0 ,
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0 ,kEsc,kEsc,kEsc, 0 ,
// @ A B C D E F G H I J K L M N O
kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
// P Q R S T U V W X Y Z [ \ ] ^ _
@@ -113,27 +113,6 @@
}
}
-// Considers the current contents of the output and sees if it looks like an
-// IP address. This is called because we canonicalize to the output assuming
-// that it's not an IP address, and now need to fix it if we produced one.
-//
-// The generated hostname is identified by |host|. The output will be fixed
-// with a canonical IP address if the host looks like one. Otherwise, there
-// will be no change.
-void InterpretIPAddress(const url_parse::Component& host,
- CanonOutput* output) {
- // Canonicalize the IP address in the output to this temporary buffer.
- // IP addresses are small, so this should not cause an allocation.
- RawCanonOutput<64> canon_ip;
- url_parse::Component out_host; // Unused.
- if (CanonicalizeIPAddress(output->data(), host, &canon_ip, &out_host)) {
- // Looks like an IP address, overwrite the existing host with the newly
- // canonicalized IP address.
- output->set_length(host.begin);
- output->Append(canon_ip.data(), canon_ip.length());
- }
-}
-
// Canonicalizes a host name that is entirely 8-bit characters (even though
// the type holding them may be 16 bits. Escaped characters will be unescaped.
// Non-7-bit characters (for example, UTF-8) will be passed unchanged.
@@ -160,12 +139,6 @@
bool* has_non_ascii) {
*has_non_ascii = false;
- // First check if the host name is an IP address.
- url_parse::Component out_ip; // Unused: we compute the size ourselves later.
- if (CanonicalizeIPAddress(host, url_parse::Component(0, host_len),
- output, &out_ip))
- return true;
-
bool success = true;
for (int i = 0; i < host_len; i++) {
unsigned char source = static_cast<unsigned char>(host[i]);
@@ -255,10 +228,6 @@
// Unescaping may have left us with ASCII input, in which case the
// unescaped version we wrote to output is complete.
if (!has_non_ascii) {
- // Need to be sure to check for IP addresses in the newly unescaped
- // output. This will fix the output if necessary.
- InterpretIPAddress(url_parse::MakeRange(begin_length, output->length()),
- output);
return true;
}
@@ -328,36 +297,55 @@
}
template<typename CHAR, typename UCHAR>
-bool DoHost(const CHAR* spec,
+void DoHost(const CHAR* spec,
const url_parse::Component& host,
CanonOutput* output,
- url_parse::Component* out_host) {
- bool success = true;
+ CanonHostInfo* host_info) {
if (host.len <= 0) {
// Empty hosts don't need anything.
- *out_host = url_parse::Component();
- return true;
+ host_info->family = CanonHostInfo::NEUTRAL;
+ host_info->out_host = url_parse::Component();
+ return;
}
bool has_non_ascii, has_escaped;
ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
- out_host->begin = output->length();
+ // Keep track of output's initial length, so we can rewind later.
+ const int output_begin = output->length();
+ bool success;
if (!has_non_ascii && !has_escaped) {
- success &= DoSimpleHost(&spec[host.begin], host.len,
- output, &has_non_ascii);
+ success = DoSimpleHost(&spec[host.begin], host.len,
+ output, &has_non_ascii);
DCHECK(!has_non_ascii);
} else {
- success &= DoComplexHost(&spec[host.begin], host.len,
- has_non_ascii, has_escaped, output);
- // We could have had escaped numerals that should now be canonicalized as
- // an IP address. This should be exceedingly rare, it's probably mostly
- // used by scammers.
+ success = DoComplexHost(&spec[host.begin], host.len,
+ has_non_ascii, has_escaped, output);
}
- out_host->len = output->length() - out_host->begin;
- return success;
+ if (!success) {
+ // Canonicalization failed. Set BROKEN to notify the caller.
+ host_info->family = CanonHostInfo::BROKEN;
+ } else {
+ // After all the other canonicalization, check if we ended up with an IP
+ // address. IP addresses are small, so writing into this temporary buffer
+ // should not cause an allocation.
+ RawCanonOutput<64> canon_ip;
+ CanonicalizeIPAddress(output->data(),
+ url_parse::MakeRange(output_begin, output->length()),
+ &canon_ip, host_info);
+
+ // If we got an IPv4/IPv6 address, copy the canonical form back to the
+ // real buffer. Otherwise, it's a hostname or broken IP, in which case
+ // we just leave it in place.
+ if (host_info->IsIPAddress()) {
+ output->set_length(output_begin);
+ output->Append(canon_ip.data(), canon_ip.length());
+ }
+ }
+
+ host_info->out_host = url_parse::MakeRange(output_begin, output->length());
}
} // namespace
@@ -366,14 +354,34 @@
const url_parse::Component& host,
CanonOutput* output,
url_parse::Component* out_host) {
- return DoHost<char, unsigned char>(spec, host, output, out_host);
+ CanonHostInfo host_info;
+ DoHost<char, unsigned char>(spec, host, output, &host_info);
+ *out_host = host_info.out_host;
+ return (host_info.family != CanonHostInfo::BROKEN);
}
bool CanonicalizeHost(const char16* spec,
const url_parse::Component& host,
CanonOutput* output,
url_parse::Component* out_host) {
- return DoHost<char16, char16>(spec, host, output, out_host);
+ CanonHostInfo host_info;
+ DoHost<char16, char16>(spec, host, output, &host_info);
+ *out_host = host_info.out_host;
+ return (host_info.family != CanonHostInfo::BROKEN);
}
+void CanonicalizeHostVerbose(const char* spec,
+ const url_parse::Component& host,
+ CanonOutput* output,
+ CanonHostInfo *host_info) {
+ DoHost<char, unsigned char>(spec, host, output, host_info);
+}
+
+void CanonicalizeHostVerbose(const char16* spec,
+ const url_parse::Component& host,
+ CanonOutput* output,
+ CanonHostInfo *host_info) {
+ DoHost<char16, char16>(spec, host, output, host_info);
+}
+
} // namespace url_canon
« no previous file with comments | « src/url_canon.h ('k') | src/url_canon_ip.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698