| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autocomplete/autocomplete_input.h" | 5 #include "chrome/browser/autocomplete/autocomplete_input.h" |
| 6 | 6 |
| 7 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
| 8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
| 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" | 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" |
| 10 #include "chrome/browser/profiles/profile_io_data.h" | 10 #include "chrome/browser/profiles/profile_io_data.h" |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 142 // use the URLFixerUpper here because we want to be smart about what we | 142 // use the URLFixerUpper here because we want to be smart about what we |
| 143 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 143 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
| 144 // to have a scheme. | 144 // to have a scheme. |
| 145 url_parse::Parsed local_parts; | 145 url_parse::Parsed local_parts; |
| 146 if (!parts) | 146 if (!parts) |
| 147 parts = &local_parts; | 147 parts = &local_parts; |
| 148 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 148 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
| 149 if (scheme) | 149 if (scheme) |
| 150 *scheme = parsed_scheme; | 150 *scheme = parsed_scheme; |
| 151 if (canonicalized_url) { | 151 if (canonicalized_url) { |
| 152 *canonicalized_url = URLFixerUpper::FixupURL(UTF16ToUTF8(text), | 152 *canonicalized_url = URLFixerUpper::FixupURL( |
| 153 UTF16ToUTF8(desired_tld)); | 153 base::UTF16ToUTF8(text), base::UTF16ToUTF8(desired_tld)); |
| 154 } | 154 } |
| 155 | 155 |
| 156 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kFileScheme)) { | 156 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kFileScheme)) { |
| 157 // A user might or might not type a scheme when entering a file URL. In | 157 // A user might or might not type a scheme when entering a file URL. In |
| 158 // either case, |parsed_scheme| will tell us that this is a file URL, but | 158 // either case, |parsed_scheme| will tell us that this is a file URL, but |
| 159 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | 159 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". |
| 160 return URL; | 160 return URL; |
| 161 } | 161 } |
| 162 | 162 |
| 163 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileSystemScheme)) { | 163 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileSystemScheme)) { |
| (...skipping 26 matching lines...) Expand all Loading... |
| 190 LowerCaseEqualsASCII(parsed_scheme, chrome::kDataScheme)) | 190 LowerCaseEqualsASCII(parsed_scheme, chrome::kDataScheme)) |
| 191 return URL; | 191 return URL; |
| 192 | 192 |
| 193 // Finally, check and see if the user has explicitly opened this scheme as | 193 // Finally, check and see if the user has explicitly opened this scheme as |
| 194 // a URL before, or if the "scheme" is actually a username. We need to do | 194 // a URL before, or if the "scheme" is actually a username. We need to do |
| 195 // this last because some schemes (e.g. "javascript") may be treated as | 195 // this last because some schemes (e.g. "javascript") may be treated as |
| 196 // "blocked" by the external protocol handler because we don't want pages to | 196 // "blocked" by the external protocol handler because we don't want pages to |
| 197 // open them, but users still can. | 197 // open them, but users still can. |
| 198 // TODO(viettrungluu): get rid of conversion. | 198 // TODO(viettrungluu): get rid of conversion. |
| 199 ExternalProtocolHandler::BlockState block_state = | 199 ExternalProtocolHandler::BlockState block_state = |
| 200 ExternalProtocolHandler::GetBlockState(UTF16ToUTF8(parsed_scheme)); | 200 ExternalProtocolHandler::GetBlockState( |
| 201 base::UTF16ToUTF8(parsed_scheme)); |
| 201 switch (block_state) { | 202 switch (block_state) { |
| 202 case ExternalProtocolHandler::DONT_BLOCK: | 203 case ExternalProtocolHandler::DONT_BLOCK: |
| 203 return URL; | 204 return URL; |
| 204 | 205 |
| 205 case ExternalProtocolHandler::BLOCK: | 206 case ExternalProtocolHandler::BLOCK: |
| 206 // If we don't want the user to open the URL, don't let it be navigated | 207 // If we don't want the user to open the URL, don't let it be navigated |
| 207 // to at all. | 208 // to at all. |
| 208 return QUERY; | 209 return QUERY; |
| 209 | 210 |
| 210 default: { | 211 default: { |
| 211 // We don't know about this scheme. It might be that the user typed a | 212 // We don't know about this scheme. It might be that the user typed a |
| 212 // URL of the form "username:password@foo.com". | 213 // URL of the form "username:password@foo.com". |
| 213 const base::string16 http_scheme_prefix = | 214 const base::string16 http_scheme_prefix = |
| 214 ASCIIToUTF16(std::string(content::kHttpScheme) + | 215 base::ASCIIToUTF16(std::string(content::kHttpScheme) + |
| 215 content::kStandardSchemeSeparator); | 216 content::kStandardSchemeSeparator); |
| 216 url_parse::Parsed http_parts; | 217 url_parse::Parsed http_parts; |
| 217 base::string16 http_scheme; | 218 base::string16 http_scheme; |
| 218 GURL http_canonicalized_url; | 219 GURL http_canonicalized_url; |
| 219 Type http_type = Parse(http_scheme_prefix + text, desired_tld, | 220 Type http_type = Parse(http_scheme_prefix + text, desired_tld, |
| 220 &http_parts, &http_scheme, | 221 &http_parts, &http_scheme, |
| 221 &http_canonicalized_url); | 222 &http_canonicalized_url); |
| 222 DCHECK_EQ(std::string(content::kHttpScheme), UTF16ToUTF8(http_scheme)); | 223 DCHECK_EQ(std::string(content::kHttpScheme), |
| 224 base::UTF16ToUTF8(http_scheme)); |
| 223 | 225 |
| 224 if (http_type == URL && | 226 if (http_type == URL && |
| 225 http_parts.username.is_nonempty() && | 227 http_parts.username.is_nonempty() && |
| 226 http_parts.password.is_nonempty()) { | 228 http_parts.password.is_nonempty()) { |
| 227 // Manually re-jigger the parsed parts to match |text| (without the | 229 // Manually re-jigger the parsed parts to match |text| (without the |
| 228 // http scheme added). | 230 // http scheme added). |
| 229 http_parts.scheme.reset(); | 231 http_parts.scheme.reset(); |
| 230 url_parse::Component* components[] = { | 232 url_parse::Component* components[] = { |
| 231 &http_parts.username, | 233 &http_parts.username, |
| 232 &http_parts.password, | 234 &http_parts.password, |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 267 | 269 |
| 268 // If we have an empty host it can't be a URL. | 270 // If we have an empty host it can't be a URL. |
| 269 if (!parts->host.is_nonempty()) | 271 if (!parts->host.is_nonempty()) |
| 270 return QUERY; | 272 return QUERY; |
| 271 | 273 |
| 272 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also | 274 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
| 273 // use the registry length later below.) | 275 // use the registry length later below.) |
| 274 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); | 276 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); |
| 275 const size_t registry_length = | 277 const size_t registry_length = |
| 276 net::registry_controlled_domains::GetRegistryLength( | 278 net::registry_controlled_domains::GetRegistryLength( |
| 277 UTF16ToUTF8(host), | 279 base::UTF16ToUTF8(host), |
| 278 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 280 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 279 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 281 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 280 if (registry_length == std::string::npos) { | 282 if (registry_length == std::string::npos) { |
| 281 // Try to append the desired_tld. | 283 // Try to append the desired_tld. |
| 282 if (!desired_tld.empty()) { | 284 if (!desired_tld.empty()) { |
| 283 base::string16 host_with_tld(host); | 285 base::string16 host_with_tld(host); |
| 284 if (host[host.length() - 1] != '.') | 286 if (host[host.length() - 1] != '.') |
| 285 host_with_tld += '.'; | 287 host_with_tld += '.'; |
| 286 host_with_tld += desired_tld; | 288 host_with_tld += desired_tld; |
| 287 const size_t tld_length = | 289 const size_t tld_length = |
| 288 net::registry_controlled_domains::GetRegistryLength( | 290 net::registry_controlled_domains::GetRegistryLength( |
| 289 UTF16ToUTF8(host_with_tld), | 291 base::UTF16ToUTF8(host_with_tld), |
| 290 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 292 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 291 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 293 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 292 if (tld_length != std::string::npos) | 294 if (tld_length != std::string::npos) |
| 293 return URL; // Something like "99999999999" that looks like a bad IP | 295 return URL; // Something like "99999999999" that looks like a bad IP |
| 294 // address, but becomes valid on attaching a TLD. | 296 // address, but becomes valid on attaching a TLD. |
| 295 } | 297 } |
| 296 return QUERY; // Could be a broken IP address, etc. | 298 return QUERY; // Could be a broken IP address, etc. |
| 297 } | 299 } |
| 298 | 300 |
| 299 | 301 |
| 300 // See if the hostname is valid. While IE and GURL allow hostnames to contain | 302 // See if the hostname is valid. While IE and GURL allow hostnames to contain |
| 301 // many other characters (perhaps for weird intranet machines), it's extremely | 303 // many other characters (perhaps for weird intranet machines), it's extremely |
| 302 // unlikely that a user would be trying to type those in for anything other | 304 // unlikely that a user would be trying to type those in for anything other |
| 303 // than a search query. | 305 // than a search query. |
| 304 url_canon::CanonHostInfo host_info; | 306 url_canon::CanonHostInfo host_info; |
| 305 const std::string canonicalized_host(net::CanonicalizeHost(UTF16ToUTF8(host), | 307 const std::string canonicalized_host(net::CanonicalizeHost( |
| 306 &host_info)); | 308 base::UTF16ToUTF8(host), &host_info)); |
| 307 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && | 309 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && |
| 308 !net::IsCanonicalizedHostCompliant(canonicalized_host, | 310 !net::IsCanonicalizedHostCompliant(canonicalized_host, |
| 309 UTF16ToUTF8(desired_tld))) { | 311 base::UTF16ToUTF8(desired_tld))) { |
| 310 // Invalid hostname. There are several possible cases: | 312 // Invalid hostname. There are several possible cases: |
| 311 // * Our checker is too strict and the user pasted in a real-world URL | 313 // * Our checker is too strict and the user pasted in a real-world URL |
| 312 // that's "invalid" but resolves. To catch these, we return UNKNOWN when | 314 // that's "invalid" but resolves. To catch these, we return UNKNOWN when |
| 313 // the user explicitly typed a scheme, so we'll still search by default | 315 // the user explicitly typed a scheme, so we'll still search by default |
| 314 // but we'll show the accidental search infobar if necessary. | 316 // but we'll show the accidental search infobar if necessary. |
| 315 // * The user is typing a multi-word query. If we see a space anywhere in | 317 // * The user is typing a multi-word query. If we see a space anywhere in |
| 316 // the hostname we assume this is a search and return QUERY. | 318 // the hostname we assume this is a search and return QUERY. |
| 317 // * Our checker is too strict and the user is typing a real-world hostname | 319 // * Our checker is too strict and the user is typing a real-world hostname |
| 318 // that's "invalid" but resolves. We return UNKNOWN if the TLD is known. | 320 // that's "invalid" but resolves. We return UNKNOWN if the TLD is known. |
| 319 // Note that we explicitly excluded hosts with spaces above so that | 321 // Note that we explicitly excluded hosts with spaces above so that |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 386 // If the host has a known TLD or a port, it's probably a URL, with the | 388 // If the host has a known TLD or a port, it's probably a URL, with the |
| 387 // following exceptions: | 389 // following exceptions: |
| 388 // * Any "IP addresses" that make it here are more likely searches | 390 // * Any "IP addresses" that make it here are more likely searches |
| 389 // (see above). | 391 // (see above). |
| 390 // * If we reach here with a username, our input looks like "user@host[.tld]". | 392 // * If we reach here with a username, our input looks like "user@host[.tld]". |
| 391 // Because there is no scheme explicitly specified, we think this is more | 393 // Because there is no scheme explicitly specified, we think this is more |
| 392 // likely an email address than an HTTP auth attempt. Hence, we search by | 394 // likely an email address than an HTTP auth attempt. Hence, we search by |
| 393 // default and let users correct us on a case-by-case basis. | 395 // default and let users correct us on a case-by-case basis. |
| 394 // Note that we special-case "localhost" as a known hostname. | 396 // Note that we special-case "localhost" as a known hostname. |
| 395 if ((host_info.family != url_canon::CanonHostInfo::IPV4) && | 397 if ((host_info.family != url_canon::CanonHostInfo::IPV4) && |
| 396 ((registry_length != 0) || (host == ASCIIToUTF16("localhost") || | 398 ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") || |
| 397 parts->port.is_nonempty()))) | 399 parts->port.is_nonempty()))) |
| 398 return parts->username.is_nonempty() ? UNKNOWN : URL; | 400 return parts->username.is_nonempty() ? UNKNOWN : URL; |
| 399 | 401 |
| 400 // If we reach this point, we know there's no known TLD on the input, so if | 402 // If we reach this point, we know there's no known TLD on the input, so if |
| 401 // the user wishes to add a desired_tld, the fixup code will oblige; thus this | 403 // the user wishes to add a desired_tld, the fixup code will oblige; thus this |
| 402 // is a URL. | 404 // is a URL. |
| 403 if (!desired_tld.empty()) | 405 if (!desired_tld.empty()) |
| 404 return URL; | 406 return URL; |
| 405 | 407 |
| 406 // No scheme, password, port, path, and no known TLD on the host. | 408 // No scheme, password, port, path, and no known TLD on the host. |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 495 ++num_nonhost_components; | 497 ++num_nonhost_components; |
| 496 if (parts.query.is_nonempty()) | 498 if (parts.query.is_nonempty()) |
| 497 ++num_nonhost_components; | 499 ++num_nonhost_components; |
| 498 if (parts.ref.is_nonempty()) | 500 if (parts.ref.is_nonempty()) |
| 499 ++num_nonhost_components; | 501 ++num_nonhost_components; |
| 500 return num_nonhost_components; | 502 return num_nonhost_components; |
| 501 } | 503 } |
| 502 | 504 |
| 503 // static | 505 // static |
| 504 bool AutocompleteInput::HasHTTPScheme(const base::string16& input) { | 506 bool AutocompleteInput::HasHTTPScheme(const base::string16& input) { |
| 505 std::string utf8_input(UTF16ToUTF8(input)); | 507 std::string utf8_input(base::UTF16ToUTF8(input)); |
| 506 url_parse::Component scheme; | 508 url_parse::Component scheme; |
| 507 if (url_util::FindAndCompareScheme(utf8_input, content::kViewSourceScheme, | 509 if (url_util::FindAndCompareScheme(utf8_input, content::kViewSourceScheme, |
| 508 &scheme)) | 510 &scheme)) |
| 509 utf8_input.erase(0, scheme.end() + 1); | 511 utf8_input.erase(0, scheme.end() + 1); |
| 510 return url_util::FindAndCompareScheme(utf8_input, content::kHttpScheme, NULL); | 512 return url_util::FindAndCompareScheme(utf8_input, content::kHttpScheme, NULL); |
| 511 } | 513 } |
| 512 | 514 |
| 513 void AutocompleteInput::UpdateText(const base::string16& text, | 515 void AutocompleteInput::UpdateText(const base::string16& text, |
| 514 size_t cursor_position, | 516 size_t cursor_position, |
| 515 const url_parse::Parsed& parts) { | 517 const url_parse::Parsed& parts) { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 528 current_page_classification_ = AutocompleteInput::INVALID_SPEC; | 530 current_page_classification_ = AutocompleteInput::INVALID_SPEC; |
| 529 type_ = INVALID; | 531 type_ = INVALID; |
| 530 parts_ = url_parse::Parsed(); | 532 parts_ = url_parse::Parsed(); |
| 531 scheme_.clear(); | 533 scheme_.clear(); |
| 532 canonicalized_url_ = GURL(); | 534 canonicalized_url_ = GURL(); |
| 533 prevent_inline_autocomplete_ = false; | 535 prevent_inline_autocomplete_ = false; |
| 534 prefer_keyword_ = false; | 536 prefer_keyword_ = false; |
| 535 allow_exact_keyword_match_ = false; | 537 allow_exact_keyword_match_ = false; |
| 536 matches_requested_ = ALL_MATCHES; | 538 matches_requested_ = ALL_MATCHES; |
| 537 } | 539 } |
| OLD | NEW |