Index: chrome/browser/autocomplete/autocomplete.cc |
=================================================================== |
--- chrome/browser/autocomplete/autocomplete.cc (revision 29420) |
+++ chrome/browser/autocomplete/autocomplete.cc (working copy) |
@@ -114,8 +114,14 @@ |
return URL; |
} |
- // If the user typed a scheme, determine our available actions based on that. |
- if (parts->scheme.is_valid()) { |
+ // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
+ // well enough that we can fall through to the heuristics below. If it's |
+ // something else, we can just determine our action based on what we do with |
+ // any input of this scheme. In theory we could do better with some schemes |
+ // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
+ // until I run into some cases that really need it. |
+ if (parts->scheme.is_nonempty() && |
+ (parsed_scheme != L"http") && (parsed_scheme != L"https")) { |
// See if we know how to handle the URL internally. |
if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme))) |
return URL; |
@@ -153,15 +159,16 @@ |
} |
} |
- // The user didn't type a scheme. Assume that this is either an HTTP URL or |
- // not a URL at all; try to determine which. |
+ // Either the user didn't type a scheme, in which case we need to distinguish |
+ // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which |
+ // case we should reject invalid formulations. |
- // It's not clear that we can reach here with an empty "host" (maybe on some |
- // kinds of garbage input?), but if we did, it couldn't be a URL. |
+ // If we have an empty host it can't be a URL. |
if (!parts->host.is_nonempty()) |
return QUERY; |
- // (We use the registry length later below but ask for it here so we can check |
- // the host's validity at this point.) |
+ |
+ // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
+ // use the registry length later below.) |
const std::wstring host(text.substr(parts->host.begin, parts->host.len)); |
const size_t registry_length = |
net::RegistryControlledDomainService::GetRegistryLength(host, false); |
@@ -187,10 +194,16 @@ |
(port >= 0) && (port <= 65535)) ? URL : QUERY; |
} |
- // Presence of a password means this is likely a URL. We don't treat |
- // usernames (without passwords) as indicating a URL, because this could be an |
- // email address like "user@mail.com" which is more likely a search than an |
- // HTTP auth login attempt. |
+ // Presence of a username could either indicate a URL or an email address |
+ // ("user@mail.com"). E-mail addresses are likely queries so we only open |
+ // this as a URL if the user explicitly typed a scheme. |
+ if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) |
+ return URL; |
+ |
+ // Presence of a password means this is likely a URL. Note that unless the |
+ // user has typed an explicit "http://" or similar, we'll probably think that |
+ // the username is some unknown scheme, and bail out in the scheme-handling |
+ // code above. |
if (parts->password.is_nonempty()) |
return URL; |
@@ -199,9 +212,10 @@ |
// If the user originally typed a host that looks like an IP address (a |
// dotted quad), they probably want to open it. If the original input was |
// something else (like a single number), they probably wanted to search for |
- // it. This is true even if the URL appears to have a path: "1.2/45" is |
- // more likely a search (for the answer to a math problem) than a URL. |
- if (host_info.num_ipv4_components == 4) |
+ // it, unless they explicitly typed a scheme. This is true even if the URL |
+ // appears to have a path: "1.2/45" is more likely a search (for the answer |
+ // to a math problem) than a URL. |
+ if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) |
return URL; |
return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; |
} |
@@ -211,13 +225,13 @@ |
// The host doesn't look like a number, so see if the user's given us a path. |
if (parts->path.is_nonempty()) { |
// Most inputs with paths are URLs, even ones without known registries (e.g. |
- // intranet URLs). However, if there's no known registry, and the path has |
- // a space, this is more likely a query with a slash in the first term (e.g. |
- // "ps/2 games") than a URL. We can still open URLs with spaces in the path |
- // by escaping the space, and we will still inline autocomplete them if |
- // users have typed them in the past, but we default to searching since |
- // that's the common case. |
- return ((registry_length == 0) && |
+ // intranet URLs). However, if the user didn't type a scheme, there's no |
+ // known registry, and the path has a space, this is more likely a query |
+ // with a slash in the first term (e.g. "ps/2 games") than a URL. We can |
+ // still open URLs with spaces in the path by escaping the space, and we |
+ // will still inline autocomplete them if users have typed them in the past, |
+ // but we default to searching since that's the common case. |
+ return (!parts->scheme.is_nonempty() && (registry_length == 0) && |
(text.substr(parts->path.begin, parts->path.len).find(' ') != |
std::wstring::npos)) ? UNKNOWN : URL; |
} |
@@ -228,9 +242,9 @@ |
if (parts->username.is_nonempty()) |
return UNKNOWN; |
- // We have a bare host string. See if it has a known TLD. If so, it's |
- // probably a URL. |
- if (registry_length != 0) |
+ // We have a bare host string. See if it has a known TLD or the user typed a |
+ // scheme. If so, it's probably a URL. |
+ if (parts->scheme.is_nonempty() || (registry_length != 0)) |
return URL; |
// No TLD that we know about. This could be: |