| Index: src/url_canon_host.cc
|
| ===================================================================
|
| --- src/url_canon_host.cc (revision 106)
|
| +++ src/url_canon_host.cc (working copy)
|
| @@ -82,7 +82,7 @@
|
| // ' ' ! " # $ % & ' ( ) * + , - . /
|
| kEsc,kEsc,kEsc,kEsc,kEsc, 0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.', 0,
|
| // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
| - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 , 0 ,kEsc,kEsc,kEsc, 0 ,
|
| + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0 ,kEsc,kEsc,kEsc, 0 ,
|
| // @ A B C D E F G H I J K L M N O
|
| kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
| // P Q R S T U V W X Y Z [ \ ] ^ _
|
| @@ -113,27 +113,6 @@
|
| }
|
| }
|
|
|
| -// Considers the current contents of the output and sees if it looks like an
|
| -// IP address. This is called because we canonicalize to the output assuming
|
| -// that it's not an IP address, and now need to fix it if we produced one.
|
| -//
|
| -// The generated hostname is identified by |host|. The output will be fixed
|
| -// with a canonical IP address if the host looks like one. Otherwise, there
|
| -// will be no change.
|
| -void InterpretIPAddress(const url_parse::Component& host,
|
| - CanonOutput* output) {
|
| - // Canonicalize the IP address in the output to this temporary buffer.
|
| - // IP addresses are small, so this should not cause an allocation.
|
| - RawCanonOutput<64> canon_ip;
|
| - url_parse::Component out_host; // Unused.
|
| - if (CanonicalizeIPAddress(output->data(), host, &canon_ip, &out_host)) {
|
| - // Looks like an IP address, overwrite the existing host with the newly
|
| - // canonicalized IP address.
|
| - output->set_length(host.begin);
|
| - output->Append(canon_ip.data(), canon_ip.length());
|
| - }
|
| -}
|
| -
|
| // Canonicalizes a host name that is entirely 8-bit characters (even though
|
| // the type holding them may be 16 bits. Escaped characters will be unescaped.
|
| // Non-7-bit characters (for example, UTF-8) will be passed unchanged.
|
| @@ -160,12 +139,6 @@
|
| bool* has_non_ascii) {
|
| *has_non_ascii = false;
|
|
|
| - // First check if the host name is an IP address.
|
| - url_parse::Component out_ip; // Unused: we compute the size ourselves later.
|
| - if (CanonicalizeIPAddress(host, url_parse::Component(0, host_len),
|
| - output, &out_ip))
|
| - return true;
|
| -
|
| bool success = true;
|
| for (int i = 0; i < host_len; i++) {
|
| unsigned char source = static_cast<unsigned char>(host[i]);
|
| @@ -255,10 +228,6 @@
|
| // Unescaping may have left us with ASCII input, in which case the
|
| // unescaped version we wrote to output is complete.
|
| if (!has_non_ascii) {
|
| - // Need to be sure to check for IP addresses in the newly unescaped
|
| - // output. This will fix the output if necessary.
|
| - InterpretIPAddress(url_parse::MakeRange(begin_length, output->length()),
|
| - output);
|
| return true;
|
| }
|
|
|
| @@ -328,36 +297,55 @@
|
| }
|
|
|
| template<typename CHAR, typename UCHAR>
|
| -bool DoHost(const CHAR* spec,
|
| +void DoHost(const CHAR* spec,
|
| const url_parse::Component& host,
|
| CanonOutput* output,
|
| - url_parse::Component* out_host) {
|
| - bool success = true;
|
| + CanonHostInfo* host_info) {
|
| if (host.len <= 0) {
|
| // Empty hosts don't need anything.
|
| - *out_host = url_parse::Component();
|
| - return true;
|
| + host_info->family = CanonHostInfo::NEUTRAL;
|
| + host_info->out_host = url_parse::Component();
|
| + return;
|
| }
|
|
|
| bool has_non_ascii, has_escaped;
|
| ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
|
|
|
| - out_host->begin = output->length();
|
| + // Keep track of output's initial length, so we can rewind later.
|
| + const int output_begin = output->length();
|
|
|
| + bool success;
|
| if (!has_non_ascii && !has_escaped) {
|
| - success &= DoSimpleHost(&spec[host.begin], host.len,
|
| - output, &has_non_ascii);
|
| + success = DoSimpleHost(&spec[host.begin], host.len,
|
| + output, &has_non_ascii);
|
| DCHECK(!has_non_ascii);
|
| } else {
|
| - success &= DoComplexHost(&spec[host.begin], host.len,
|
| - has_non_ascii, has_escaped, output);
|
| - // We could have had escaped numerals that should now be canonicalized as
|
| - // an IP address. This should be exceedingly rare, it's probably mostly
|
| - // used by scammers.
|
| + success = DoComplexHost(&spec[host.begin], host.len,
|
| + has_non_ascii, has_escaped, output);
|
| }
|
|
|
| - out_host->len = output->length() - out_host->begin;
|
| - return success;
|
| + if (!success) {
|
| + // Canonicalization failed. Set BROKEN to notify the caller.
|
| + host_info->family = CanonHostInfo::BROKEN;
|
| + } else {
|
| + // After all the other canonicalization, check if we ended up with an IP
|
| + // address. IP addresses are small, so writing into this temporary buffer
|
| + // should not cause an allocation.
|
| + RawCanonOutput<64> canon_ip;
|
| + CanonicalizeIPAddress(output->data(),
|
| + url_parse::MakeRange(output_begin, output->length()),
|
| + &canon_ip, host_info);
|
| +
|
| + // If we got an IPv4/IPv6 address, copy the canonical form back to the
|
| + // real buffer. Otherwise, it's a hostname or broken IP, in which case
|
| + // we just leave it in place.
|
| + if (host_info->IsIPAddress()) {
|
| + output->set_length(output_begin);
|
| + output->Append(canon_ip.data(), canon_ip.length());
|
| + }
|
| + }
|
| +
|
| + host_info->out_host = url_parse::MakeRange(output_begin, output->length());
|
| }
|
|
|
| } // namespace
|
| @@ -366,14 +354,34 @@
|
| const url_parse::Component& host,
|
| CanonOutput* output,
|
| url_parse::Component* out_host) {
|
| - return DoHost<char, unsigned char>(spec, host, output, out_host);
|
| + CanonHostInfo host_info;
|
| + DoHost<char, unsigned char>(spec, host, output, &host_info);
|
| + *out_host = host_info.out_host;
|
| + return (host_info.family != CanonHostInfo::BROKEN);
|
| }
|
|
|
| bool CanonicalizeHost(const char16* spec,
|
| const url_parse::Component& host,
|
| CanonOutput* output,
|
| url_parse::Component* out_host) {
|
| - return DoHost<char16, char16>(spec, host, output, out_host);
|
| + CanonHostInfo host_info;
|
| + DoHost<char16, char16>(spec, host, output, &host_info);
|
| + *out_host = host_info.out_host;
|
| + return (host_info.family != CanonHostInfo::BROKEN);
|
| }
|
|
|
| +void CanonicalizeHostVerbose(const char* spec,
|
| + const url_parse::Component& host,
|
| + CanonOutput* output,
|
| + CanonHostInfo *host_info) {
|
| + DoHost<char, unsigned char>(spec, host, output, host_info);
|
| +}
|
| +
|
| +void CanonicalizeHostVerbose(const char16* spec,
|
| + const url_parse::Component& host,
|
| + CanonOutput* output,
|
| + CanonHostInfo *host_info) {
|
| + DoHost<char16, char16>(spec, host, output, host_info);
|
| +}
|
| +
|
| } // namespace url_canon
|
|
|