OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/autocomplete_input.h" | 5 #include "chrome/browser/autocomplete/autocomplete_input.h" |
6 | 6 |
7 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
9 #include "chrome/browser/external_protocol/external_protocol_handler.h" | 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" |
10 #include "chrome/browser/profiles/profile_io_data.h" | 10 #include "chrome/browser/profiles/profile_io_data.h" |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
142 // use the URLFixerUpper here because we want to be smart about what we | 142 // use the URLFixerUpper here because we want to be smart about what we |
143 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 143 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
144 // to have a scheme. | 144 // to have a scheme. |
145 url_parse::Parsed local_parts; | 145 url_parse::Parsed local_parts; |
146 if (!parts) | 146 if (!parts) |
147 parts = &local_parts; | 147 parts = &local_parts; |
148 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 148 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
149 if (scheme) | 149 if (scheme) |
150 *scheme = parsed_scheme; | 150 *scheme = parsed_scheme; |
151 if (canonicalized_url) { | 151 if (canonicalized_url) { |
152 *canonicalized_url = URLFixerUpper::FixupURL(UTF16ToUTF8(text), | 152 *canonicalized_url = URLFixerUpper::FixupURL( |
153 UTF16ToUTF8(desired_tld)); | 153 base::UTF16ToUTF8(text), base::UTF16ToUTF8(desired_tld)); |
154 } | 154 } |
155 | 155 |
156 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kFileScheme)) { | 156 if (LowerCaseEqualsASCII(parsed_scheme, chrome::kFileScheme)) { |
157 // A user might or might not type a scheme when entering a file URL. In | 157 // A user might or might not type a scheme when entering a file URL. In |
158 // either case, |parsed_scheme| will tell us that this is a file URL, but | 158 // either case, |parsed_scheme| will tell us that this is a file URL, but |
159 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | 159 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". |
160 return URL; | 160 return URL; |
161 } | 161 } |
162 | 162 |
163 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileSystemScheme)) { | 163 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileSystemScheme)) { |
(...skipping 26 matching lines...) Expand all Loading... |
190 LowerCaseEqualsASCII(parsed_scheme, chrome::kDataScheme)) | 190 LowerCaseEqualsASCII(parsed_scheme, chrome::kDataScheme)) |
191 return URL; | 191 return URL; |
192 | 192 |
193 // Finally, check and see if the user has explicitly opened this scheme as | 193 // Finally, check and see if the user has explicitly opened this scheme as |
194 // a URL before, or if the "scheme" is actually a username. We need to do | 194 // a URL before, or if the "scheme" is actually a username. We need to do |
195 // this last because some schemes (e.g. "javascript") may be treated as | 195 // this last because some schemes (e.g. "javascript") may be treated as |
196 // "blocked" by the external protocol handler because we don't want pages to | 196 // "blocked" by the external protocol handler because we don't want pages to |
197 // open them, but users still can. | 197 // open them, but users still can. |
198 // TODO(viettrungluu): get rid of conversion. | 198 // TODO(viettrungluu): get rid of conversion. |
199 ExternalProtocolHandler::BlockState block_state = | 199 ExternalProtocolHandler::BlockState block_state = |
200 ExternalProtocolHandler::GetBlockState(UTF16ToUTF8(parsed_scheme)); | 200 ExternalProtocolHandler::GetBlockState( |
| 201 base::UTF16ToUTF8(parsed_scheme)); |
201 switch (block_state) { | 202 switch (block_state) { |
202 case ExternalProtocolHandler::DONT_BLOCK: | 203 case ExternalProtocolHandler::DONT_BLOCK: |
203 return URL; | 204 return URL; |
204 | 205 |
205 case ExternalProtocolHandler::BLOCK: | 206 case ExternalProtocolHandler::BLOCK: |
206 // If we don't want the user to open the URL, don't let it be navigated | 207 // If we don't want the user to open the URL, don't let it be navigated |
207 // to at all. | 208 // to at all. |
208 return QUERY; | 209 return QUERY; |
209 | 210 |
210 default: { | 211 default: { |
211 // We don't know about this scheme. It might be that the user typed a | 212 // We don't know about this scheme. It might be that the user typed a |
212 // URL of the form "username:password@foo.com". | 213 // URL of the form "username:password@foo.com". |
213 const base::string16 http_scheme_prefix = | 214 const base::string16 http_scheme_prefix = |
214 ASCIIToUTF16(std::string(content::kHttpScheme) + | 215 base::ASCIIToUTF16(std::string(content::kHttpScheme) + |
215 content::kStandardSchemeSeparator); | 216 content::kStandardSchemeSeparator); |
216 url_parse::Parsed http_parts; | 217 url_parse::Parsed http_parts; |
217 base::string16 http_scheme; | 218 base::string16 http_scheme; |
218 GURL http_canonicalized_url; | 219 GURL http_canonicalized_url; |
219 Type http_type = Parse(http_scheme_prefix + text, desired_tld, | 220 Type http_type = Parse(http_scheme_prefix + text, desired_tld, |
220 &http_parts, &http_scheme, | 221 &http_parts, &http_scheme, |
221 &http_canonicalized_url); | 222 &http_canonicalized_url); |
222 DCHECK_EQ(std::string(content::kHttpScheme), UTF16ToUTF8(http_scheme)); | 223 DCHECK_EQ(std::string(content::kHttpScheme), |
| 224 base::UTF16ToUTF8(http_scheme)); |
223 | 225 |
224 if (http_type == URL && | 226 if (http_type == URL && |
225 http_parts.username.is_nonempty() && | 227 http_parts.username.is_nonempty() && |
226 http_parts.password.is_nonempty()) { | 228 http_parts.password.is_nonempty()) { |
227 // Manually re-jigger the parsed parts to match |text| (without the | 229 // Manually re-jigger the parsed parts to match |text| (without the |
228 // http scheme added). | 230 // http scheme added). |
229 http_parts.scheme.reset(); | 231 http_parts.scheme.reset(); |
230 url_parse::Component* components[] = { | 232 url_parse::Component* components[] = { |
231 &http_parts.username, | 233 &http_parts.username, |
232 &http_parts.password, | 234 &http_parts.password, |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
267 | 269 |
268 // If we have an empty host it can't be a URL. | 270 // If we have an empty host it can't be a URL. |
269 if (!parts->host.is_nonempty()) | 271 if (!parts->host.is_nonempty()) |
270 return QUERY; | 272 return QUERY; |
271 | 273 |
272 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also | 274 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
273 // use the registry length later below.) | 275 // use the registry length later below.) |
274 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); | 276 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); |
275 const size_t registry_length = | 277 const size_t registry_length = |
276 net::registry_controlled_domains::GetRegistryLength( | 278 net::registry_controlled_domains::GetRegistryLength( |
277 UTF16ToUTF8(host), | 279 base::UTF16ToUTF8(host), |
278 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 280 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
279 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 281 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
280 if (registry_length == std::string::npos) { | 282 if (registry_length == std::string::npos) { |
281 // Try to append the desired_tld. | 283 // Try to append the desired_tld. |
282 if (!desired_tld.empty()) { | 284 if (!desired_tld.empty()) { |
283 base::string16 host_with_tld(host); | 285 base::string16 host_with_tld(host); |
284 if (host[host.length() - 1] != '.') | 286 if (host[host.length() - 1] != '.') |
285 host_with_tld += '.'; | 287 host_with_tld += '.'; |
286 host_with_tld += desired_tld; | 288 host_with_tld += desired_tld; |
287 const size_t tld_length = | 289 const size_t tld_length = |
288 net::registry_controlled_domains::GetRegistryLength( | 290 net::registry_controlled_domains::GetRegistryLength( |
289 UTF16ToUTF8(host_with_tld), | 291 base::UTF16ToUTF8(host_with_tld), |
290 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 292 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
291 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 293 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
292 if (tld_length != std::string::npos) | 294 if (tld_length != std::string::npos) |
293 return URL; // Something like "99999999999" that looks like a bad IP | 295 return URL; // Something like "99999999999" that looks like a bad IP |
294 // address, but becomes valid on attaching a TLD. | 296 // address, but becomes valid on attaching a TLD. |
295 } | 297 } |
296 return QUERY; // Could be a broken IP address, etc. | 298 return QUERY; // Could be a broken IP address, etc. |
297 } | 299 } |
298 | 300 |
299 | 301 |
300 // See if the hostname is valid. While IE and GURL allow hostnames to contain | 302 // See if the hostname is valid. While IE and GURL allow hostnames to contain |
301 // many other characters (perhaps for weird intranet machines), it's extremely | 303 // many other characters (perhaps for weird intranet machines), it's extremely |
302 // unlikely that a user would be trying to type those in for anything other | 304 // unlikely that a user would be trying to type those in for anything other |
303 // than a search query. | 305 // than a search query. |
304 url_canon::CanonHostInfo host_info; | 306 url_canon::CanonHostInfo host_info; |
305 const std::string canonicalized_host(net::CanonicalizeHost(UTF16ToUTF8(host), | 307 const std::string canonicalized_host(net::CanonicalizeHost( |
306 &host_info)); | 308 base::UTF16ToUTF8(host), &host_info)); |
307 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && | 309 if ((host_info.family == url_canon::CanonHostInfo::NEUTRAL) && |
308 !net::IsCanonicalizedHostCompliant(canonicalized_host, | 310 !net::IsCanonicalizedHostCompliant(canonicalized_host, |
309 UTF16ToUTF8(desired_tld))) { | 311 base::UTF16ToUTF8(desired_tld))) { |
310 // Invalid hostname. There are several possible cases: | 312 // Invalid hostname. There are several possible cases: |
311 // * Our checker is too strict and the user pasted in a real-world URL | 313 // * Our checker is too strict and the user pasted in a real-world URL |
312 // that's "invalid" but resolves. To catch these, we return UNKNOWN when | 314 // that's "invalid" but resolves. To catch these, we return UNKNOWN when |
313 // the user explicitly typed a scheme, so we'll still search by default | 315 // the user explicitly typed a scheme, so we'll still search by default |
314 // but we'll show the accidental search infobar if necessary. | 316 // but we'll show the accidental search infobar if necessary. |
315 // * The user is typing a multi-word query. If we see a space anywhere in | 317 // * The user is typing a multi-word query. If we see a space anywhere in |
316 // the hostname we assume this is a search and return QUERY. | 318 // the hostname we assume this is a search and return QUERY. |
317 // * Our checker is too strict and the user is typing a real-world hostname | 319 // * Our checker is too strict and the user is typing a real-world hostname |
318 // that's "invalid" but resolves. We return UNKNOWN if the TLD is known. | 320 // that's "invalid" but resolves. We return UNKNOWN if the TLD is known. |
319 // Note that we explicitly excluded hosts with spaces above so that | 321 // Note that we explicitly excluded hosts with spaces above so that |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
386 // If the host has a known TLD or a port, it's probably a URL, with the | 388 // If the host has a known TLD or a port, it's probably a URL, with the |
387 // following exceptions: | 389 // following exceptions: |
388 // * Any "IP addresses" that make it here are more likely searches | 390 // * Any "IP addresses" that make it here are more likely searches |
389 // (see above). | 391 // (see above). |
390 // * If we reach here with a username, our input looks like "user@host[.tld]". | 392 // * If we reach here with a username, our input looks like "user@host[.tld]". |
391 // Because there is no scheme explicitly specified, we think this is more | 393 // Because there is no scheme explicitly specified, we think this is more |
392 // likely an email address than an HTTP auth attempt. Hence, we search by | 394 // likely an email address than an HTTP auth attempt. Hence, we search by |
393 // default and let users correct us on a case-by-case basis. | 395 // default and let users correct us on a case-by-case basis. |
394 // Note that we special-case "localhost" as a known hostname. | 396 // Note that we special-case "localhost" as a known hostname. |
395 if ((host_info.family != url_canon::CanonHostInfo::IPV4) && | 397 if ((host_info.family != url_canon::CanonHostInfo::IPV4) && |
396 ((registry_length != 0) || (host == ASCIIToUTF16("localhost") || | 398 ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") || |
397 parts->port.is_nonempty()))) | 399 parts->port.is_nonempty()))) |
398 return parts->username.is_nonempty() ? UNKNOWN : URL; | 400 return parts->username.is_nonempty() ? UNKNOWN : URL; |
399 | 401 |
400 // If we reach this point, we know there's no known TLD on the input, so if | 402 // If we reach this point, we know there's no known TLD on the input, so if |
401 // the user wishes to add a desired_tld, the fixup code will oblige; thus this | 403 // the user wishes to add a desired_tld, the fixup code will oblige; thus this |
402 // is a URL. | 404 // is a URL. |
403 if (!desired_tld.empty()) | 405 if (!desired_tld.empty()) |
404 return URL; | 406 return URL; |
405 | 407 |
406 // No scheme, password, port, path, and no known TLD on the host. | 408 // No scheme, password, port, path, and no known TLD on the host. |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
495 ++num_nonhost_components; | 497 ++num_nonhost_components; |
496 if (parts.query.is_nonempty()) | 498 if (parts.query.is_nonempty()) |
497 ++num_nonhost_components; | 499 ++num_nonhost_components; |
498 if (parts.ref.is_nonempty()) | 500 if (parts.ref.is_nonempty()) |
499 ++num_nonhost_components; | 501 ++num_nonhost_components; |
500 return num_nonhost_components; | 502 return num_nonhost_components; |
501 } | 503 } |
502 | 504 |
503 // static | 505 // static |
504 bool AutocompleteInput::HasHTTPScheme(const base::string16& input) { | 506 bool AutocompleteInput::HasHTTPScheme(const base::string16& input) { |
505 std::string utf8_input(UTF16ToUTF8(input)); | 507 std::string utf8_input(base::UTF16ToUTF8(input)); |
506 url_parse::Component scheme; | 508 url_parse::Component scheme; |
507 if (url_util::FindAndCompareScheme(utf8_input, content::kViewSourceScheme, | 509 if (url_util::FindAndCompareScheme(utf8_input, content::kViewSourceScheme, |
508 &scheme)) | 510 &scheme)) |
509 utf8_input.erase(0, scheme.end() + 1); | 511 utf8_input.erase(0, scheme.end() + 1); |
510 return url_util::FindAndCompareScheme(utf8_input, content::kHttpScheme, NULL); | 512 return url_util::FindAndCompareScheme(utf8_input, content::kHttpScheme, NULL); |
511 } | 513 } |
512 | 514 |
513 void AutocompleteInput::UpdateText(const base::string16& text, | 515 void AutocompleteInput::UpdateText(const base::string16& text, |
514 size_t cursor_position, | 516 size_t cursor_position, |
515 const url_parse::Parsed& parts) { | 517 const url_parse::Parsed& parts) { |
(...skipping 12 matching lines...) Expand all Loading... |
528 current_page_classification_ = AutocompleteInput::INVALID_SPEC; | 530 current_page_classification_ = AutocompleteInput::INVALID_SPEC; |
529 type_ = INVALID; | 531 type_ = INVALID; |
530 parts_ = url_parse::Parsed(); | 532 parts_ = url_parse::Parsed(); |
531 scheme_.clear(); | 533 scheme_.clear(); |
532 canonicalized_url_ = GURL(); | 534 canonicalized_url_ = GURL(); |
533 prevent_inline_autocomplete_ = false; | 535 prevent_inline_autocomplete_ = false; |
534 prefer_keyword_ = false; | 536 prefer_keyword_ = false; |
535 allow_exact_keyword_match_ = false; | 537 allow_exact_keyword_match_ = false; |
536 matches_requested_ = ALL_MATCHES; | 538 matches_requested_ = ALL_MATCHES; |
537 } | 539 } |
OLD | NEW |