Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autocomplete/autocomplete.h" | 5 #include "chrome/browser/autocomplete/autocomplete.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "app/l10n_util.h" | 9 #include "app/l10n_util.h" |
| 10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 116 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 116 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
| 117 // to have a scheme. | 117 // to have a scheme. |
| 118 url_parse::Parsed local_parts; | 118 url_parse::Parsed local_parts; |
| 119 if (!parts) | 119 if (!parts) |
| 120 parts = &local_parts; | 120 parts = &local_parts; |
| 121 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 121 const std::wstring parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
| 122 if (scheme) | 122 if (scheme) |
| 123 *scheme = parsed_scheme; | 123 *scheme = parsed_scheme; |
| 124 | 124 |
| 125 if (parsed_scheme == L"file") { | 125 if (parsed_scheme == L"file") { |
| 126 // A user might or might not type a scheme when entering a file URL. | 126 // A user might or might not type a scheme when entering a file URL. In |
|
Peter Kasting
2010/08/04 17:54:13
Nit: Thanks for the comment update, it's a lot cle
| |
| 127 // either case, |parsed_scheme| will tell us that this is a file URL, but | |
| 128 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | |
| 127 return URL; | 129 return URL; |
| 128 } | 130 } |
| 129 | 131 |
| 130 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it | 132 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
| 131 // well enough that we can fall through to the heuristics below. If it's | 133 // well enough that we can fall through to the heuristics below. If it's |
| 132 // something else, we can just determine our action based on what we do with | 134 // something else, we can just determine our action based on what we do with |
| 133 // any input of this scheme. In theory we could do better with some schemes | 135 // any input of this scheme. In theory we could do better with some schemes |
| 134 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that | 136 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
| 135 // until I run into some cases that really need it. | 137 // until I run into some cases that really need it. |
| 136 if (parts->scheme.is_nonempty() && | 138 if (parts->scheme.is_nonempty() && |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 226 // Thus we fall down in the following cases: | 228 // Thus we fall down in the following cases: |
| 227 // * Trying to navigate to a hostname with spaces | 229 // * Trying to navigate to a hostname with spaces |
| 228 // * Trying to navigate to a hostname with invalid characters and an unknown | 230 // * Trying to navigate to a hostname with invalid characters and an unknown |
| 229 // TLD | 231 // TLD |
| 230 // These are rare, though probably possible in intranets. | 232 // These are rare, though probably possible in intranets. |
| 231 return (parts->scheme.is_nonempty() || | 233 return (parts->scheme.is_nonempty() || |
| 232 ((registry_length != 0) && (host.find(' ') == std::wstring::npos))) ? | 234 ((registry_length != 0) && (host.find(' ') == std::wstring::npos))) ? |
| 233 UNKNOWN : QUERY; | 235 UNKNOWN : QUERY; |
| 234 } | 236 } |
| 235 | 237 |
| 236 // Presence of a port means this is likely a URL, if the port is really a port | 238 // A port number is a good indicator that this is a URL. However, it might |
| 237 // number. If it's just garbage after a colon, this is a query. | 239 // also be a query like "1.66:1" that looks kind of like an IP address and |
| 240 // port number. So here we only check for "port numbers" that are illegal and | |
| 241 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save | |
| 242 // handling legal port numbers until after the "IP address" determination | |
| 243 // below. | |
| 238 if (parts->port.is_nonempty()) { | 244 if (parts->port.is_nonempty()) { |
| 239 int port; | 245 int port; |
| 240 return (base::StringToInt(WideToUTF8( | 246 if (!base::StringToInt(WideToUTF8( |
| 241 text.substr(parts->port.begin, parts->port.len)), &port) && | 247 text.substr(parts->port.begin, parts->port.len)), &port) || |
| 242 (port >= 0) && (port <= 65535)) ? URL : QUERY; | 248 (port < 0) || (port > 65535)) |
| 249 return QUERY; | |
| 243 } | 250 } |
| 244 | 251 |
| 245 // Presence of a username could either indicate a URL or an email address | 252 // Now that we've ruled out all schemes other than http or https and done a |
| 246 // ("user@mail.com"). E-mail addresses are likely queries so we only open | 253 // little more sanity checking, the presence of a scheme means this is likely |
| 247 // this as a URL if the user explicitly typed a scheme. | 254 // a URL. |
| 248 if (parts->username.is_nonempty() && parts->scheme.is_nonempty()) | 255 if (parts->scheme.is_nonempty()) |
| 249 return URL; | |
| 250 | |
| 251 // Presence of a password means this is likely a URL. Note that unless the | |
| 252 // user has typed an explicit "http://" or similar, we'll probably think that | |
| 253 // the username is some unknown scheme, and bail out in the scheme-handling | |
| 254 // code above. | |
| 255 if (parts->password.is_nonempty()) | |
| 256 return URL; | 256 return URL; |
| 257 | 257 |
| 258 // See if the host is an IP address. | 258 // See if the host is an IP address. |
| 259 if (host_info.family == url_canon::CanonHostInfo::IPV4) { | 259 if (host_info.family == url_canon::CanonHostInfo::IPV4) { |
| 260 // If the user originally typed a host that looks like an IP address (a | 260 // If the user originally typed a host that looks like an IP address (a |
| 261 // dotted quad), they probably want to open it. If the original input was | 261 // dotted quad), they probably want to open it. If the original input was |
| 262 // something else (like a single number), they probably wanted to search for | 262 // something else (like a single number), they probably wanted to search for |
| 263 // it, unless they explicitly typed a scheme. This is true even if the URL | 263 // it, unless they explicitly typed a scheme. This is true even if the URL |
| 264 // appears to have a path: "1.2/45" is more likely a search (for the answer | 264 // appears to have a path: "1.2/45" is more likely a search (for the answer |
| 265 // to a math problem) than a URL. | 265 // to a math problem) than a URL. |
| 266 if ((host_info.num_ipv4_components == 4) || parts->scheme.is_nonempty()) | 266 if (host_info.num_ipv4_components == 4) |
| 267 return URL; | 267 return URL; |
| 268 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; | 268 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; |
| 269 } | 269 } |
| 270 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 270 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
| 271 return URL; | 271 return URL; |
| 272 | 272 |
| 273 // Now that we've ruled out invalid ports and queries that look like they have | |
| 274 // a port, the presence of a port means this is likely a URL. | |
| 275 if (parts->port.is_nonempty()) | |
| 276 return URL; | |
| 277 | |
| 278 // Presence of a password means this is likely a URL. Note that unless the | |
| 279 // user has typed an explicit "http://" or similar, we'll probably think that | |
| 280 // the username is some unknown scheme, and bail out in the scheme-handling | |
| 281 // code above. | |
| 282 if (parts->password.is_nonempty()) | |
| 283 return URL; | |
| 284 | |
| 273 // The host doesn't look like a number, so see if the user's given us a path. | 285 // The host doesn't look like a number, so see if the user's given us a path. |
| 274 if (parts->path.is_nonempty()) { | 286 if (parts->path.is_nonempty()) { |
| 275 // Most inputs with paths are URLs, even ones without known registries (e.g. | 287 // Most inputs with paths are URLs, even ones without known registries (e.g. |
| 276 // intranet URLs). However, if the user didn't type a scheme, there's no | 288 // intranet URLs). However, if there's no known registry and the path has |
| 277 // known registry, and the path has a space, this is more likely a query | 289 // a space, this is more likely a query with a slash in the first term |
| 278 // with a slash in the first term (e.g. "ps/2 games") than a URL. We can | 290 // (e.g. "ps/2 games") than a URL. We can still open URLs with spaces in |
| 279 // still open URLs with spaces in the path by escaping the space, and we | 291 // the path by escaping the space, and we will still inline autocomplete |
| 280 // will still inline autocomplete them if users have typed them in the past, | 292 // them if users have typed them in the past, but we default to searching |
| 281 // but we default to searching since that's the common case. | 293 // since that's the common case. |
| 282 return (!parts->scheme.is_nonempty() && (registry_length == 0) && | 294 return ((registry_length == 0) && |
| 283 (text.substr(parts->path.begin, parts->path.len).find(' ') != | 295 (text.substr(parts->path.begin, parts->path.len).find(' ') != |
| 284 std::wstring::npos)) ? UNKNOWN : URL; | 296 std::wstring::npos)) ? UNKNOWN : URL; |
| 285 } | 297 } |
| 286 | 298 |
| 287 // If we reach here with a username, our input looks like "user@host"; this is | 299 // If we reach here with a username, our input looks like "user@host". |
| 288 // the case mentioned above, where we think this is more likely an email | 300 // Because there is no scheme explicitly specified, we think this is more |
| 289 // address than an HTTP auth attempt, so search for it. | 301 // likely an email address than an HTTP auth attempt. Hence, we search by |
| 302 // default and let users correct us on a case-by-case basis. | |
| 290 if (parts->username.is_nonempty()) | 303 if (parts->username.is_nonempty()) |
| 291 return UNKNOWN; | 304 return UNKNOWN; |
| 292 | 305 |
| 293 // We have a bare host string. See if it has a known TLD or the user typed a | 306 // We have a bare host string. If it has a known TLD, it's probably a URL. |
| 294 // scheme. If so, it's probably a URL. | 307 if (registry_length != 0) |
| 295 if (parts->scheme.is_nonempty() || (registry_length != 0)) | |
| 296 return URL; | 308 return URL; |
| 297 | 309 |
| 298 // No TLD that we know about. This could be: | 310 // No TLD that we know about. This could be: |
| 299 // * A string that the user wishes to add a desired_tld to to get a URL. If | 311 // * A string that the user wishes to add a desired_tld to to get a URL. If |
| 300 // we reach this point, we know there's no known TLD on the string, so the | 312 // we reach this point, we know there's no known TLD on the string, so the |
| 301 // fixup code will be willing to add one; thus this is a URL. | 313 // fixup code will be willing to add one; thus this is a URL. |
| 302 // * A single word "foo"; possibly an intranet site, but more likely a search. | 314 // * A single word "foo"; possibly an intranet site, but more likely a search. |
| 303 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code | 315 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code |
| 304 // catch our mistakes. | 316 // catch our mistakes. |
| 305 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds | 317 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds |
| (...skipping 759 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1065 void AutocompleteController::CheckIfDone() { | 1077 void AutocompleteController::CheckIfDone() { |
| 1066 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); | 1078 for (ACProviders::const_iterator i(providers_.begin()); i != providers_.end(); |
| 1067 ++i) { | 1079 ++i) { |
| 1068 if (!(*i)->done()) { | 1080 if (!(*i)->done()) { |
| 1069 done_ = false; | 1081 done_ = false; |
| 1070 return; | 1082 return; |
| 1071 } | 1083 } |
| 1072 } | 1084 } |
| 1073 done_ = true; | 1085 done_ = true; |
| 1074 } | 1086 } |
| OLD | NEW |