Index: chrome/browser/autocomplete/autocomplete.cc |
diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc |
index 0693ca298eb1105860f860ce88844d6df673e609..198be975276eea00c8ba705fdd54a78a3d44a364 100644 |
--- a/chrome/browser/autocomplete/autocomplete.cc |
+++ b/chrome/browser/autocomplete/autocomplete.cc |
@@ -123,7 +123,9 @@ AutocompleteInput::Type AutocompleteInput::Parse( |
*scheme = parsed_scheme; |
if (parsed_scheme == L"file") { |
- // A user might or might not type a scheme when entering a file URL. |
+ // A user might or might not type a scheme when entering a file URL. In |
Peter Kasting
2010/08/04 17:54:13
Nit: Thanks for the comment update, it's a lot cle
|
+ // either case, |parsed_scheme| will tell us that this is a file URL, but |
+ // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". |
return URL; |
} |
@@ -233,26 +235,24 @@ AutocompleteInput::Type AutocompleteInput::Parse( |
UNKNOWN : QUERY; |
} |
- // Presence of a port means this is likely a URL, if the port is really a port |
- // number. If it's just garbage after a colon, this is a query. |
+ // A port number is a good indicator that this is a URL. However, it might |
+ // also be a query like "1.66:1" that looks kind of like an IP address and |
+ // port number. So here we only check for "port numbers" that are illegal and |
+ // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save |
+ // handling legal port numbers until after the "IP address" determination |
+ // below. |
if (parts->port.is_nonempty()) { |
int port; |
- return (base::StringToInt(WideToUTF8( |
- text.substr(parts->port.begin, parts->port.len)), &port) && |
- (port >= 0) && (port <= 65535)) ? URL : QUERY; |
+ if (!base::StringToInt(WideToUTF8( |
+ text.substr(parts->port.begin, parts->port.len)), &port) || |
+ (port < 0) || (port > 65535)) |
+ return QUERY; |
} |
- // Presence of a username could either indicate a URL or an email address |
- // ("user@mail.com"). E-mail addresses are likely queries so we only open |
- // this as a URL if the user explicitly typed a scheme. |
- if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) |
- return URL; |
- |
- // Presence of a password means this is likely a URL. Note that unless the |
- // user has typed an explicit "http://" or similar, we'll probably think that |
- // the username is some unknown scheme, and bail out in the scheme-handling |
- // code above. |
- if (parts->password.is_nonempty()) |
+ // Now that we've ruled out all schemes other than http or https and done a |
+ // little more sanity checking, the presence of a scheme means this is likely |
+ // a URL. |
+ if (parts->scheme.is_nonempty()) |
return URL; |
// See if the host is an IP address. |
@@ -263,36 +263,48 @@ AutocompleteInput::Type AutocompleteInput::Parse( |
// it, unless they explicitly typed a scheme. This is true even if the URL |
// appears to have a path: "1.2/45" is more likely a search (for the answer |
// to a math problem) than a URL. |
- if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) |
+ if (host_info.num_ipv4_components == 4) |
return URL; |
return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; |
} |
if (host_info.family == url_canon::CanonHostInfo::IPV6) |
return URL; |
+ // Now that we've ruled out invalid ports and queries that look like they have |
+ // a port, the presence of a port means this is likely a URL. |
+ if (parts->port.is_nonempty()) |
+ return URL; |
+ |
+ // Presence of a password means this is likely a URL. Note that unless the |
+ // user has typed an explicit "http://" or similar, we'll probably think that |
+ // the username is some unknown scheme, and bail out in the scheme-handling |
+ // code above. |
+ if (parts->password.is_nonempty()) |
+ return URL; |
+ |
// The host doesn't look like a number, so see if the user's given us a path. |
if (parts->path.is_nonempty()) { |
// Most inputs with paths are URLs, even ones without known registries (e.g. |
- // intranet URLs). However, if the user didn't type a scheme, there's no |
- // known registry, and the path has a space, this is more likely a query |
- // with a slash in the first term (e.g. "ps/2 games") than a URL. We can |
- // still open URLs with spaces in the path by escaping the space, and we |
- // will still inline autocomplete them if users have typed them in the past, |
- // but we default to searching since that's the common case. |
- return (!parts->scheme.is_nonempty() && (registry_length == 0) && |
+ // intranet URLs). However, if there's no known registry and the path has |
+ // a space, this is more likely a query with a slash in the first term |
+ // (e.g. "ps/2 games") than a URL. We can still open URLs with spaces in |
+ // the path by escaping the space, and we will still inline autocomplete |
+ // them if users have typed them in the past, but we default to searching |
+ // since that's the common case. |
+ return ((registry_length == 0) && |
(text.substr(parts->path.begin, parts->path.len).find(' ') != |
std::wstring::npos)) ? UNKNOWN : URL; |
} |
- // If we reach here with a username, our input looks like "user@host"; this is |
- // the case mentioned above, where we think this is more likely an email |
- // address than an HTTP auth attempt, so search for it. |
+ // If we reach here with a username, our input looks like "user@host". |
+ // Because there is no scheme explicitly specified, we think this is more |
+ // likely an email address than an HTTP auth attempt. Hence, we search by |
+ // default and let users correct us on a case-by-case basis. |
if (parts->username.is_nonempty()) |
return UNKNOWN; |
- // We have a bare host string. See if it has a known TLD or the user typed a |
- // scheme. If so, it's probably a URL. |
- if (parts->scheme.is_nonempty() || (registry_length != 0)) |
+ // We have a bare host string. If it has a known TLD, it's probably a URL. |
+ if (registry_length != 0) |
return URL; |
// No TLD that we know about. This could be: |