| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autocomplete/autocomplete.h" | 5 #include "chrome/browser/autocomplete/autocomplete.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "app/l10n_util.h" | 9 #include "app/l10n_util.h" |
| 10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 // to have a scheme. | 107 // to have a scheme. |
| 108 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 108 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
| 109 if (scheme) | 109 if (scheme) |
| 110 *scheme = parsed_scheme; | 110 *scheme = parsed_scheme; |
| 111 | 111 |
| 112 if (parsed_scheme == L"file") { | 112 if (parsed_scheme == L"file") { |
| 113 // A user might or might not type a scheme when entering a file URL. | 113 // A user might or might not type a scheme when entering a file URL. |
| 114 return URL; | 114 return URL; |
| 115 } | 115 } |
| 116 | 116 |
| 117 // If the user typed a scheme, determine our available actions based on that. | 117 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
| 118 if (parts->scheme.is_valid()) { | 118 // well enough that we can fall through to the heuristics below. If it's |
| 119 // something else, we can just determine our action based on what we do with |
| 120 // any input of this scheme. In theory we could do better with some schemes |
| 121 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
| 122 // until I run into some cases that really need it. |
| 123 if (parts->scheme.is_nonempty() && |
| 124 (parsed_scheme != L"http") && (parsed_scheme != L"https")) { |
| 119 // See if we know how to handle the URL internally. | 125 // See if we know how to handle the URL internally. |
| 120 if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme))) | 126 if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme))) |
| 121 return URL; | 127 return URL; |
| 122 | 128 |
| 123 // There are also some schemes that we convert to other things before they | 129 // There are also some schemes that we convert to other things before they |
| 124 // reach the renderer or else the renderer handles internally without | 130 // reach the renderer or else the renderer handles internally without |
| 125 // reaching the URLRequest logic. We thus won't catch these above, but we | 131 // reaching the URLRequest logic. We thus won't catch these above, but we |
| 126 // should still claim to handle them. | 132 // should still claim to handle them. |
| 127 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kViewSourceScheme) || | 133 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kViewSourceScheme) || |
| 128 LowerCaseEqualsASCII(parsed_scheme, chrome::kJavaScriptScheme) || | 134 LowerCaseEqualsASCII(parsed_scheme, chrome::kJavaScriptScheme) || |
| (...skipping 17 matching lines...) Expand all Loading... |
| 146 default: | 152 default: |
| 147 // We don't know about this scheme. It's likely to be a search operator | 153 // We don't know about this scheme. It's likely to be a search operator |
| 148 // like "site:" or "link:". We classify it as UNKNOWN so the user has | 154 // like "site:" or "link:". We classify it as UNKNOWN so the user has |
| 149 // the option of treating it as a URL if we're wrong. | 155 // the option of treating it as a URL if we're wrong. |
| 150 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or | 156 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or |
| 151 // "www.example.com:81" in this case. | 157 // "www.example.com:81" in this case. |
| 152 return UNKNOWN; | 158 return UNKNOWN; |
| 153 } | 159 } |
| 154 } | 160 } |
| 155 | 161 |
| 156 // The user didn't type a scheme. Assume that this is either an HTTP URL or | 162 // Either the user didn't type a scheme, in which case we need to distinguish |
| 157 // not a URL at all; try to determine which. | 163 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which |
| 164 // case we should reject invalid formulations. |
| 158 | 165 |
| 159 // It's not clear that we can reach here with an empty "host" (maybe on some | 166 // If we have an empty host it can't be a URL. |
| 160 // kinds of garbage input?), but if we did, it couldn't be a URL. | |
| 161 if (!parts->host.is_nonempty()) | 167 if (!parts->host.is_nonempty()) |
| 162 return QUERY; | 168 return QUERY; |
| 163 // (We use the registry length later below but ask for it here so we can check | 169 |
| 164 // the host's validity at this point.) | 170 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
| 171 // use the registry length later below.) |
| 165 const std::wstring host(text.substr(parts->host.begin, parts->host.len)); | 172 const std::wstring host(text.substr(parts->host.begin, parts->host.len)); |
| 166 const size_t registry_length = | 173 const size_t registry_length = |
| 167 net::RegistryControlledDomainService::GetRegistryLength(host, false); | 174 net::RegistryControlledDomainService::GetRegistryLength(host, false); |
| 168 if (registry_length == std::wstring::npos) | 175 if (registry_length == std::wstring::npos) |
| 169 return QUERY; // Could be a broken IP address, etc. | 176 return QUERY; // Could be a broken IP address, etc. |
| 170 | 177 |
| 171 // See if the hostname is valid per RFC 1738. While IE and GURL allow | 178 // See if the hostname is valid per RFC 1738. While IE and GURL allow |
| 172 // hostnames to contain many other characters (perhaps for weird intranet | 179 // hostnames to contain many other characters (perhaps for weird intranet |
| 173 // machines), it's extremely unlikely that a user would be trying to type | 180 // machines), it's extremely unlikely that a user would be trying to type |
| 174 // those in for anything other than a search query. | 181 // those in for anything other than a search query. |
| 175 url_canon::CanonHostInfo host_info; | 182 url_canon::CanonHostInfo host_info; |
| 176 const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); | 183 const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); |
| 177 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && | 184 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && |
| 178 !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host)) | 185 !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host)) |
| 179 return QUERY; | 186 return QUERY; |
| 180 | 187 |
| 181 // Presence of a port means this is likely a URL, if the port is really a port | 188 // Presence of a port means this is likely a URL, if the port is really a port |
| 182 // number. If it's just garbage after a colon, this is a query. | 189 // number. If it's just garbage after a colon, this is a query. |
| 183 if (parts->port.is_nonempty()) { | 190 if (parts->port.is_nonempty()) { |
| 184 int port; | 191 int port; |
| 185 return (StringToInt(WideToUTF16( | 192 return (StringToInt(WideToUTF16( |
| 186 text.substr(parts->port.begin, parts->port.len)), &port) && | 193 text.substr(parts->port.begin, parts->port.len)), &port) && |
| 187 (port >= 0) && (port <= 65535)) ? URL : QUERY; | 194 (port >= 0) && (port <= 65535)) ? URL : QUERY; |
| 188 } | 195 } |
| 189 | 196 |
| 190 // Presence of a password means this is likely a URL. We don't treat | 197 // Presence of a username could either indicate a URL or an email address |
| 191 // usernames (without passwords) as indicating a URL, because this could be an | 198 // ("user@mail.com"). E-mail addresses are likely queries so we only open |
| 192 // email address like "user@mail.com" which is more likely a search than an | 199 // this as a URL if the user explicitly typed a scheme. |
| 193 // HTTP auth login attempt. | 200 if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) |
| 201 return URL; |
| 202 |
| 203 // Presence of a password means this is likely a URL. Note that unless the |
| 204 // user has typed an explicit "http://" or similar, we'll probably think that |
| 205 // the username is some unknown scheme, and bail out in the scheme-handling |
| 206 // code above. |
| 194 if (parts->password.is_nonempty()) | 207 if (parts->password.is_nonempty()) |
| 195 return URL; | 208 return URL; |
| 196 | 209 |
| 197 // See if the host is an IP address. | 210 // See if the host is an IP address. |
| 198 if (host_info.family == url_canon::CanonHostInfo::IPV4) { | 211 if (host_info.family == url_canon::CanonHostInfo::IPV4) { |
| 199 // If the user originally typed a host that looks like an IP address (a | 212 // If the user originally typed a host that looks like an IP address (a |
| 200 // dotted quad), they probably want to open it. If the original input was | 213 // dotted quad), they probably want to open it. If the original input was |
| 201 // something else (like a single number), they probably wanted to search for | 214 // something else (like a single number), they probably wanted to search for |
| 202 // it. This is true even if the URL appears to have a path: "1.2/45" is | 215 // it, unless they explicitly typed a scheme. This is true even if the URL |
| 203 // more likely a search (for the answer to a math problem) than a URL. | 216 // appears to have a path: "1.2/45" is more likely a search (for the answer |
| 204 if (host_info.num_ipv4_components == 4) | 217 // to a math problem) than a URL. |
| 218 if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) |
| 205 return URL; | 219 return URL; |
| 206 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; | 220 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; |
| 207 } | 221 } |
| 208 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 222 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
| 209 return URL; | 223 return URL; |
| 210 | 224 |
| 211 // The host doesn't look like a number, so see if the user's given us a path. | 225 // The host doesn't look like a number, so see if the user's given us a path. |
| 212 if (parts->path.is_nonempty()) { | 226 if (parts->path.is_nonempty()) { |
| 213 // Most inputs with paths are URLs, even ones without known registries (e.g. | 227 // Most inputs with paths are URLs, even ones without known registries (e.g. |
| 214 // intranet URLs). However, if there's no known registry, and the path has | 228 // intranet URLs). However, if the user didn't type a scheme, there's no |
| 215 // a space, this is more likely a query with a slash in the first term (e.g. | 229 // known registry, and the path has a space, this is more likely a query |
| 216 // "ps/2 games") than a URL. We can still open URLs with spaces in the path | 230 // with a slash in the first term (e.g. "ps/2 games") than a URL. We can |
| 217 // by escaping the space, and we will still inline autocomplete them if | 231 // still open URLs with spaces in the path by escaping the space, and we |
| 218 // users have typed them in the past, but we default to searching since | 232 // will still inline autocomplete them if users have typed them in the past, |
| 219 // that's the common case. | 233 // but we default to searching since that's the common case. |
| 220 return ((registry_length == 0) && | 234 return (!parts->scheme.is_nonempty() && (registry_length == 0) && |
| 221 (text.substr(parts->path.begin, parts->path.len).find(' ') != | 235 (text.substr(parts->path.begin, parts->path.len).find(' ') != |
| 222 std::wstring::npos)) ? UNKNOWN : URL; | 236 std::wstring::npos)) ? UNKNOWN : URL; |
| 223 } | 237 } |
| 224 | 238 |
| 225 // If we reach here with a username, our input looks like "user@host"; this is | 239 // If we reach here with a username, our input looks like "user@host"; this is |
| 226 // the case mentioned above, where we think this is more likely an email | 240 // the case mentioned above, where we think this is more likely an email |
| 227 // address than an HTTP auth attempt, so search for it. | 241 // address than an HTTP auth attempt, so search for it. |
| 228 if (parts->username.is_nonempty()) | 242 if (parts->username.is_nonempty()) |
| 229 return UNKNOWN; | 243 return UNKNOWN; |
| 230 | 244 |
| 231 // We have a bare host string. See if it has a known TLD. If so, it's | 245 // We have a bare host string. See if it has a known TLD or the user typed a |
| 232 // probably a URL. | 246 // scheme. If so, it's probably a URL. |
| 233 if (registry_length != 0) | 247 if (parts->scheme.is_nonempty() || (registry_length != 0)) |
| 234 return URL; | 248 return URL; |
| 235 | 249 |
| 236 // No TLD that we know about. This could be: | 250 // No TLD that we know about. This could be: |
| 237 // * A string that the user wishes to add a desired_tld to to get a URL. If | 251 // * A string that the user wishes to add a desired_tld to to get a URL. If |
| 238 // we reach this point, we know there's no known TLD on the string, so the | 252 // we reach this point, we know there's no known TLD on the string, so the |
| 239 // fixup code will be willing to add one; thus this is a URL. | 253 // fixup code will be willing to add one; thus this is a URL. |
| 240 // * A single word "foo"; possibly an intranet site, but more likely a search. | 254 // * A single word "foo"; possibly an intranet site, but more likely a search. |
| 241 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code | 255 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code |
| 242 // catch our mistakes. | 256 // catch our mistakes. |
| 243 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds | 257 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds |
| (...skipping 683 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 927 void AutocompleteController::CheckIfDone() { | 941 void AutocompleteController::CheckIfDone() { |
| 928 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); | 942 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); |
| 929 ++i) { | 943 ++i) { |
| 930 if (!(*i)->done()) { | 944 if (!(*i)->done()) { |
| 931 done_ = false; | 945 done_ = false; |
| 932 return; | 946 return; |
| 933 } | 947 } |
| 934 } | 948 } |
| 935 done_ = true; | 949 done_ = true; |
| 936 } | 950 } |
| OLD | NEW |