OLD | NEW |
---|---|
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/autocomplete.h" | 5 #include "chrome/browser/autocomplete/autocomplete.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "app/l10n_util.h" | 9 #include "app/l10n_util.h" |
10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
116 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 116 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
117 // to have a scheme. | 117 // to have a scheme. |
118 url_parse::Parsed local_parts; | 118 url_parse::Parsed local_parts; |
119 if (!parts) | 119 if (!parts) |
120 parts = &local_parts; | 120 parts = &local_parts; |
121 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 121 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
122 if (scheme) | 122 if (scheme) |
123 *scheme = parsed_scheme; | 123 *scheme = parsed_scheme; |
124 | 124 |
125 if (parsed_scheme == L"file") { | 125 if (parsed_scheme == L"file") { |
126 // A user might or might not type a scheme when entering a file URL. | 126 // A user might or might not type a scheme when entering a file URL. In |
Peter Kasting
2010/08/04 17:54:13
Nit: Thanks for the comment update, it's a lot cle
| |
127 // either case, |parsed_scheme| will tell us that this is a file URL, but | |
128 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | |
127 return URL; | 129 return URL; |
128 } | 130 } |
129 | 131 |
130 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it | 132 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
131 // well enough that we can fall through to the heuristics below. If it's | 133 // well enough that we can fall through to the heuristics below. If it's |
132 // something else, we can just determine our action based on what we do with | 134 // something else, we can just determine our action based on what we do with |
133 // any input of this scheme. In theory we could do better with some schemes | 135 // any input of this scheme. In theory we could do better with some schemes |
134 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that | 136 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
135 // until I run into some cases that really need it. | 137 // until I run into some cases that really need it. |
136 if (parts->scheme.is_nonempty() && | 138 if (parts->scheme.is_nonempty() && |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
226 // Thus we fall down in the following cases: | 228 // Thus we fall down in the following cases: |
227 // * Trying to navigate to a hostname with spaces | 229 // * Trying to navigate to a hostname with spaces |
228 // * Trying to navigate to a hostname with invalid characters and an unknown | 230 // * Trying to navigate to a hostname with invalid characters and an unknown |
229 // TLD | 231 // TLD |
230 // These are rare, though probably possible in intranets. | 232 // These are rare, though probably possible in intranets. |
231 return (parts->scheme.is_nonempty() || | 233 return (parts->scheme.is_nonempty() || |
232 ((registry_length != 0) && (host.find(' ') == std::wstring::npos))) ? | 234 ((registry_length != 0) && (host.find(' ') == std::wstring::npos))) ? |
233 UNKNOWN : QUERY; | 235 UNKNOWN : QUERY; |
234 } | 236 } |
235 | 237 |
236 // Presence of a port means this is likely a URL, if the port is really a port | 238 // A port number is a good indicator that this is a URL. However, it might |
237 // number. If it's just garbage after a colon, this is a query. | 239 // also be a query like "1.66:1" that looks kind of like an IP address and |
240 // port number. So here we only check for "port numbers" that are illegal and | |
241 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save | |
242 // handling legal port numbers until after the "IP address" determination | |
243 // below. | |
238 if (parts->port.is_nonempty()) { | 244 if (parts->port.is_nonempty()) { |
239 int port; | 245 int port; |
240 return (base::StringToInt(WideToUTF8( | 246 if (!base::StringToInt(WideToUTF8( |
241 text.substr(parts->port.begin, parts->port.len)), &port) && | 247 text.substr(parts->port.begin, parts->port.len)), &port) || |
242 (port >= 0) && (port <= 65535)) ? URL : QUERY; | 248 (port < 0) || (port > 65535)) |
249 return QUERY; | |
243 } | 250 } |
244 | 251 |
245 // Presence of a username could either indicate a URL or an email address | 252 // Now that we've ruled out all schemes other than http or https and done a |
246 // ("user@mail.com"). E-mail addresses are likely queries so we only open | 253 // little more sanity checking, the presence of a scheme means this is likely |
247 // this as a URL if the user explicitly typed a scheme. | 254 // a URL. |
248 if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) | 255 if (parts->scheme.is_nonempty()) |
249 return URL; | |
250 | |
251 // Presence of a password means this is likely a URL. Note that unless the | |
252 // user has typed an explicit "http://" or similar, we'll probably think that | |
253 // the username is some unknown scheme, and bail out in the scheme-handling | |
254 // code above. | |
255 if (parts->password.is_nonempty()) | |
256 return URL; | 256 return URL; |
257 | 257 |
258 // See if the host is an IP address. | 258 // See if the host is an IP address. |
259 if (host_info.family == url_canon::CanonHostInfo::IPV4) { | 259 if (host_info.family == url_canon::CanonHostInfo::IPV4) { |
260 // If the user originally typed a host that looks like an IP address (a | 260 // If the user originally typed a host that looks like an IP address (a |
261 // dotted quad), they probably want to open it. If the original input was | 261 // dotted quad), they probably want to open it. If the original input was |
262 // something else (like a single number), they probably wanted to search for | 262 // something else (like a single number), they probably wanted to search for |
263 // it, unless they explicitly typed a scheme. This is true even if the URL | 263 // it, unless they explicitly typed a scheme. This is true even if the URL |
264 // appears to have a path: "1.2/45" is more likely a search (for the answer | 264 // appears to have a path: "1.2/45" is more likely a search (for the answer |
265 // to a math problem) than a URL. | 265 // to a math problem) than a URL. |
266 if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) | 266 if (host_info.num_ipv4_components == 4) |
267 return URL; | 267 return URL; |
268 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; | 268 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; |
269 } | 269 } |
270 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 270 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
271 return URL; | 271 return URL; |
272 | 272 |
273 // Now that we've ruled out invalid ports and queries that look like they have | |
274 // a port, the presence of a port means this is likely a URL. | |
275 if (parts->port.is_nonempty()) | |
276 return URL; | |
277 | |
278 // Presence of a password means this is likely a URL. Note that unless the | |
279 // user has typed an explicit "http://" or similar, we'll probably think that | |
280 // the username is some unknown scheme, and bail out in the scheme-handling | |
281 // code above. | |
282 if (parts->password.is_nonempty()) | |
283 return URL; | |
284 | |
273 // The host doesn't look like a number, so see if the user's given us a path. | 285 // The host doesn't look like a number, so see if the user's given us a path. |
274 if (parts->path.is_nonempty()) { | 286 if (parts->path.is_nonempty()) { |
275 // Most inputs with paths are URLs, even ones without known registries (e.g. | 287 // Most inputs with paths are URLs, even ones without known registries (e.g. |
276 // intranet URLs). However, if the user didn't type a scheme, there's no | 288 // intranet URLs). However, if there's no known registry and the path has |
277 // known registry, and the path has a space, this is more likely a query | 289 // a space, this is more likely a query with a slash in the first term |
278 // with a slash in the first term (e.g. "ps/2 games") than a URL. We can | 290 // (e.g. "ps/2 games") than a URL. We can still open URLs with spaces in |
279 // still open URLs with spaces in the path by escaping the space, and we | 291 // the path by escaping the space, and we will still inline autocomplete |
280 // will still inline autocomplete them if users have typed them in the past, | 292 // them if users have typed them in the past, but we default to searching |
281 // but we default to searching since that's the common case. | 293 // since that's the common case. |
282 return (!parts->scheme.is_nonempty() && (registry_length == 0) && | 294 return ((registry_length == 0) && |
283 (text.substr(parts->path.begin, parts->path.len).find(' ') != | 295 (text.substr(parts->path.begin, parts->path.len).find(' ') != |
284 std::wstring::npos)) ? UNKNOWN : URL; | 296 std::wstring::npos)) ? UNKNOWN : URL; |
285 } | 297 } |
286 | 298 |
287 // If we reach here with a username, our input looks like "user@host"; this is | 299 // If we reach here with a username, our input looks like "user@host". |
288 // the case mentioned above, where we think this is more likely an email | 300 // Because there is no scheme explicitly specified, we think this is more |
289 // address than an HTTP auth attempt, so search for it. | 301 // likely an email address than an HTTP auth attempt. Hence, we search by |
302 // default and let users correct us on a case-by-case basis. | |
290 if (parts->username.is_nonempty()) | 303 if (parts->username.is_nonempty()) |
291 return UNKNOWN; | 304 return UNKNOWN; |
292 | 305 |
293 // We have a bare host string. See if it has a known TLD or the user typed a | 306 // We have a bare host string. If it has a known TLD, it's probably a URL. |
294 // scheme. If so, it's probably a URL. | 307 if (registry_length != 0) |
295 if (parts->scheme.is_nonempty() || (registry_length != 0)) | |
296 return URL; | 308 return URL; |
297 | 309 |
298 // No TLD that we know about. This could be: | 310 // No TLD that we know about. This could be: |
299 // * A string that the user wishes to add a desired_tld to to get a URL. If | 311 // * A string that the user wishes to add a desired_tld to to get a URL. If |
300 // we reach this point, we know there's no known TLD on the string, so the | 312 // we reach this point, we know there's no known TLD on the string, so the |
301 // fixup code will be willing to add one; thus this is a URL. | 313 // fixup code will be willing to add one; thus this is a URL. |
302 // * A single word "foo"; possibly an intranet site, but more likely a search. | 314 // * A single word "foo"; possibly an intranet site, but more likely a search. |
303 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code | 315 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code |
304 // catch our mistakes. | 316 // catch our mistakes. |
305 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds | 317 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds |
(...skipping 759 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1065 void AutocompleteController::CheckIfDone() { | 1077 void AutocompleteController::CheckIfDone() { |
1066 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); | 1078 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); |
1067 ++i) { | 1079 ++i) { |
1068 if (!(*i)->done()) { | 1080 if (!(*i)->done()) { |
1069 done_ = false; | 1081 done_ = false; |
1070 return; | 1082 return; |
1071 } | 1083 } |
1072 } | 1084 } |
1073 done_ = true; | 1085 done_ = true; |
1074 } | 1086 } |
OLD | NEW |