OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/autocomplete.h" | 5 #include "chrome/browser/autocomplete/autocomplete.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "app/l10n_util.h" | 9 #include "app/l10n_util.h" |
10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
107 // to have a scheme. | 107 // to have a scheme. |
108 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 108 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
109 if (scheme) | 109 if (scheme) |
110 *scheme = parsed_scheme; | 110 *scheme = parsed_scheme; |
111 | 111 |
112 if (parsed_scheme == L"file") { | 112 if (parsed_scheme == L"file") { |
113 // A user might or might not type a scheme when entering a file URL. | 113 // A user might or might not type a scheme when entering a file URL. |
114 return URL; | 114 return URL; |
115 } | 115 } |
116 | 116 |
117 // If the user typed a scheme, determine our available actions based on that. | 117 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
118 if (parts->scheme.is_valid()) { | 118 // well enough that we can fall through to the heuristics below. If it's |
| 119 // something else, we can just determine our action based on what we do with |
| 120 // any input of this scheme. In theory we could do better with some schemes |
| 121 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
| 122 // until I run into some cases that really need it. |
| 123 if (parts->scheme.is_nonempty() && |
| 124 (parsed_scheme != L"http") && (parsed_scheme != L"https")) { |
119 // See if we know how to handle the URL internally. | 125 // See if we know how to handle the URL internally. |
120 if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme))) | 126 if (URLRequest::IsHandledProtocol(WideToASCII(parsed_scheme))) |
121 return URL; | 127 return URL; |
122 | 128 |
123 // There are also some schemes that we convert to other things before they | 129 // There are also some schemes that we convert to other things before they |
124 // reach the renderer or else the renderer handles internally without | 130 // reach the renderer or else the renderer handles internally without |
125 // reaching the URLRequest logic. We thus won't catch these above, but we | 131 // reaching the URLRequest logic. We thus won't catch these above, but we |
126 // should still claim to handle them. | 132 // should still claim to handle them. |
127 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kViewSourceScheme) || | 133 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kViewSourceScheme) || |
128 LowerCaseEqualsASCII(parsed_scheme, chrome::kJavaScriptScheme) || | 134 LowerCaseEqualsASCII(parsed_scheme, chrome::kJavaScriptScheme) || |
(...skipping 17 matching lines...) Expand all Loading... |
146 default: | 152 default: |
147 // We don't know about this scheme. It's likely to be a search operator | 153 // We don't know about this scheme. It's likely to be a search operator |
148 // like "site:" or "link:". We classify it as UNKNOWN so the user has | 154 // like "site:" or "link:". We classify it as UNKNOWN so the user has |
149 // the option of treating it as a URL if we're wrong. | 155 // the option of treating it as a URL if we're wrong. |
150 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or | 156 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or |
151 // "www.example.com:81" in this case. | 157 // "www.example.com:81" in this case. |
152 return UNKNOWN; | 158 return UNKNOWN; |
153 } | 159 } |
154 } | 160 } |
155 | 161 |
156 // The user didn't type a scheme. Assume that this is either an HTTP URL or | 162 // Either the user didn't type a scheme, in which case we need to distinguish |
157 // not a URL at all; try to determine which. | 163 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which |
| 164 // case we should reject invalid formulations. |
158 | 165 |
159 // It's not clear that we can reach here with an empty "host" (maybe on some | 166 // If we have an empty host it can't be a URL. |
160 // kinds of garbage input?), but if we did, it couldn't be a URL. | |
161 if (!parts->host.is_nonempty()) | 167 if (!parts->host.is_nonempty()) |
162 return QUERY; | 168 return QUERY; |
163 // (We use the registry length later below but ask for it here so we can check | 169 |
164 // the host's validity at this point.) | 170 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
| 171 // use the registry length later below.) |
165 const std::wstring host(text.substr(parts->host.begin, parts->host.len)); | 172 const std::wstring host(text.substr(parts->host.begin, parts->host.len)); |
166 const size_t registry_length = | 173 const size_t registry_length = |
167 net::RegistryControlledDomainService::GetRegistryLength(host, false); | 174 net::RegistryControlledDomainService::GetRegistryLength(host, false); |
168 if (registry_length == std::wstring::npos) | 175 if (registry_length == std::wstring::npos) |
169 return QUERY; // Could be a broken IP address, etc. | 176 return QUERY; // Could be a broken IP address, etc. |
170 | 177 |
171 // See if the hostname is valid per RFC 1738. While IE and GURL allow | 178 // See if the hostname is valid per RFC 1738. While IE and GURL allow |
172 // hostnames to contain many other characters (perhaps for weird intranet | 179 // hostnames to contain many other characters (perhaps for weird intranet |
173 // machines), it's extremely unlikely that a user would be trying to type | 180 // machines), it's extremely unlikely that a user would be trying to type |
174 // those in for anything other than a search query. | 181 // those in for anything other than a search query. |
175 url_canon::CanonHostInfo host_info; | 182 url_canon::CanonHostInfo host_info; |
176 const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); | 183 const std::string canonicalized_host(net::CanonicalizeHost(host, &host_info)); |
177 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && | 184 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && |
178 !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host)) | 185 !net::IsCanonicalizedHostRFC1738Compliant(canonicalized_host)) |
179 return QUERY; | 186 return QUERY; |
180 | 187 |
181 // Presence of a port means this is likely a URL, if the port is really a port | 188 // Presence of a port means this is likely a URL, if the port is really a port |
182 // number. If it's just garbage after a colon, this is a query. | 189 // number. If it's just garbage after a colon, this is a query. |
183 if (parts->port.is_nonempty()) { | 190 if (parts->port.is_nonempty()) { |
184 int port; | 191 int port; |
185 return (StringToInt(WideToUTF16( | 192 return (StringToInt(WideToUTF16( |
186 text.substr(parts->port.begin, parts->port.len)), &port) && | 193 text.substr(parts->port.begin, parts->port.len)), &port) && |
187 (port >= 0) && (port <= 65535)) ? URL : QUERY; | 194 (port >= 0) && (port <= 65535)) ? URL : QUERY; |
188 } | 195 } |
189 | 196 |
190 // Presence of a password means this is likely a URL. We don't treat | 197 // Presence of a username could either indicate a URL or an email address |
191 // usernames (without passwords) as indicating a URL, because this could be an | 198 // ("user@mail.com"). E-mail addresses are likely queries so we only open |
192 // email address like "user@mail.com" which is more likely a search than an | 199 // this as a URL if the user explicitly typed a scheme. |
193 // HTTP auth login attempt. | 200 if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) |
| 201 return URL; |
| 202 |
| 203 // Presence of a password means this is likely a URL. Note that unless the |
| 204 // user has typed an explicit "http://" or similar, we'll probably think that |
| 205 // the username is some unknown scheme, and bail out in the scheme-handling |
| 206 // code above. |
194 if (parts->password.is_nonempty()) | 207 if (parts->password.is_nonempty()) |
195 return URL; | 208 return URL; |
196 | 209 |
197 // See if the host is an IP address. | 210 // See if the host is an IP address. |
198 if (host_info.family == url_canon::CanonHostInfo::IPV4) { | 211 if (host_info.family == url_canon::CanonHostInfo::IPV4) { |
199 // If the user originally typed a host that looks like an IP address (a | 212 // If the user originally typed a host that looks like an IP address (a |
200 // dotted quad), they probably want to open it. If the original input was | 213 // dotted quad), they probably want to open it. If the original input was |
201 // something else (like a single number), they probably wanted to search for | 214 // something else (like a single number), they probably wanted to search for |
202 // it. This is true even if the URL appears to have a path: "1.2/45" is | 215 // it, unless they explicitly typed a scheme. This is true even if the URL |
203 // more likely a search (for the answer to a math problem) than a URL. | 216 // appears to have a path: "1.2/45" is more likely a search (for the answer |
204 if (host_info.num_ipv4_components == 4) | 217 // to a math problem) than a URL. |
| 218 if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) |
205 return URL; | 219 return URL; |
206 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; | 220 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; |
207 } | 221 } |
208 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 222 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
209 return URL; | 223 return URL; |
210 | 224 |
211 // The host doesn't look like a number, so see if the user's given us a path. | 225 // The host doesn't look like a number, so see if the user's given us a path. |
212 if (parts->path.is_nonempty()) { | 226 if (parts->path.is_nonempty()) { |
213 // Most inputs with paths are URLs, even ones without known registries (e.g. | 227 // Most inputs with paths are URLs, even ones without known registries (e.g. |
214 // intranet URLs). However, if there's no known registry, and the path has | 228 // intranet URLs). However, if the user didn't type a scheme, there's no |
215 // a space, this is more likely a query with a slash in the first term (e.g. | 229 // known registry, and the path has a space, this is more likely a query |
216 // "ps/2 games") than a URL. We can still open URLs with spaces in the path | 230 // with a slash in the first term (e.g. "ps/2 games") than a URL. We can |
217 // by escaping the space, and we will still inline autocomplete them if | 231 // still open URLs with spaces in the path by escaping the space, and we |
218 // users have typed them in the past, but we default to searching since | 232 // will still inline autocomplete them if users have typed them in the past, |
219 // that's the common case. | 233 // but we default to searching since that's the common case. |
220 return ((registry_length == 0) && | 234 return (!parts->scheme.is_nonempty() && (registry_length == 0) && |
221 (text.substr(parts->path.begin, parts->path.len).find(' ') != | 235 (text.substr(parts->path.begin, parts->path.len).find(' ') != |
222 std::wstring::npos)) ? UNKNOWN : URL; | 236 std::wstring::npos)) ? UNKNOWN : URL; |
223 } | 237 } |
224 | 238 |
225 // If we reach here with a username, our input looks like "user@host"; this is | 239 // If we reach here with a username, our input looks like "user@host"; this is |
226 // the case mentioned above, where we think this is more likely an email | 240 // the case mentioned above, where we think this is more likely an email |
227 // address than an HTTP auth attempt, so search for it. | 241 // address than an HTTP auth attempt, so search for it. |
228 if (parts->username.is_nonempty()) | 242 if (parts->username.is_nonempty()) |
229 return UNKNOWN; | 243 return UNKNOWN; |
230 | 244 |
231 // We have a bare host string. See if it has a known TLD. If so, it's | 245 // We have a bare host string. See if it has a known TLD or the user typed a |
232 // probably a URL. | 246 // scheme. If so, it's probably a URL. |
233 if (registry_length != 0) | 247 if (parts->scheme.is_nonempty() || (registry_length != 0)) |
234 return URL; | 248 return URL; |
235 | 249 |
236 // No TLD that we know about. This could be: | 250 // No TLD that we know about. This could be: |
237 // * A string that the user wishes to add a desired_tld to to get a URL. If | 251 // * A string that the user wishes to add a desired_tld to to get a URL. If |
238 // we reach this point, we know there's no known TLD on the string, so the | 252 // we reach this point, we know there's no known TLD on the string, so the |
239 // fixup code will be willing to add one; thus this is a URL. | 253 // fixup code will be willing to add one; thus this is a URL. |
240 // * A single word "foo"; possibly an intranet site, but more likely a search. | 254 // * A single word "foo"; possibly an intranet site, but more likely a search. |
241 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code | 255 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code |
242 // catch our mistakes. | 256 // catch our mistakes. |
243 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds | 257 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds |
(...skipping 683 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
927 void AutocompleteController::CheckIfDone() { | 941 void AutocompleteController::CheckIfDone() { |
928 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); | 942 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); |
929 ++i) { | 943 ++i) { |
930 if (!(*i)->done()) { | 944 if (!(*i)->done()) { |
931 done_ = false; | 945 done_ = false; |
932 return; | 946 return; |
933 } | 947 } |
934 } | 948 } |
935 done_ = true; | 949 done_ = true; |
936 } | 950 } |
OLD | NEW |