OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/autocomplete.h" | 5 #include "chrome/browser/autocomplete/autocomplete.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/basictypes.h" | 9 #include "base/basictypes.h" |
10 #include "base/command_line.h" | 10 #include "base/command_line.h" |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
317 ((registry_length != 0) && (host.find(' ') == string16::npos))) ? | 317 ((registry_length != 0) && (host.find(' ') == string16::npos))) ? |
318 UNKNOWN : QUERY; | 318 UNKNOWN : QUERY; |
319 } | 319 } |
320 | 320 |
321 // A port number is a good indicator that this is a URL. However, it might | 321 // A port number is a good indicator that this is a URL. However, it might |
322 // also be a query like "1.66:1" that looks kind of like an IP address and | 322 // also be a query like "1.66:1" that looks kind of like an IP address and |
323 // port number. So here we only check for "port numbers" that are illegal and | 323 // port number. So here we only check for "port numbers" that are illegal and |
324 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save | 324 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save |
325 // handling legal port numbers until after the "IP address" determination | 325 // handling legal port numbers until after the "IP address" determination |
326 // below. | 326 // below. |
327 if (parts->port.is_nonempty()) { | 327 if (url_parse::ParsePort(text.c_str(), parts->port) == |
328 int port; | 328 url_parse::PORT_INVALID) |
329 if (!base::StringToInt(text.substr(parts->port.begin, parts->port.len), | 329 return QUERY; |
330 &port) || | |
331 (port < 0) || (port > 65535)) | |
332 return QUERY; | |
333 } | |
334 | 330 |
335 // Now that we've ruled out all schemes other than http or https and done a | 331 // Now that we've ruled out all schemes other than http or https and done a |
336 // little more sanity checking, the presence of a scheme means this is likely | 332 // little more sanity checking, the presence of a scheme means this is likely |
337 // a URL. | 333 // a URL. |
338 if (parts->scheme.is_nonempty()) | 334 if (parts->scheme.is_nonempty()) |
339 return URL; | 335 return URL; |
340 | 336 |
341 // See if the host is an IP address. | 337 // See if the host is an IP address. |
342 if (host_info.family == url_canon::CanonHostInfo::IPV4) { | |
343 // If the user originally typed a host that looks like an IP address (a | |
344 // dotted quad), they probably want to open it. If the original input was | |
345 // something else (like a single number), they probably wanted to search for | |
346 // it, unless they explicitly typed a scheme. This is true even if the URL | |
347 // appears to have a path: "1.2/45" is more likely a search (for the answer | |
348 // to a math problem) than a URL. | |
349 if (host_info.num_ipv4_components == 4) | |
350 return URL; | |
351 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL; | |
352 } | |
353 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 338 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
354 return URL; | 339 return URL; |
| 340 // If the user originally typed a host that looks like an IP address (a |
| 341 // dotted quad), they probably want to open it. If the original input was |
| 342 // something else (like a single number), they probably wanted to search for |
| 343 // it, unless they explicitly typed a scheme. This is true even if the URL |
| 344 // appears to have a path: "1.2/45" is more likely a search (for the answer |
| 345 // to a math problem) than a URL. However, if there are more non-host |
| 346 // components, then maybe this really was intended to be a navigation. For |
| 347 // this reason we only check the dotted-quad case here, and save the "other |
| 348 // IP addresses" case for after we check the number of non-host components |
| 349 // below. |
| 350 if ((host_info.family == url_canon::CanonHostInfo::IPV4) && |
| 351 (host_info.num_ipv4_components == 4)) |
| 352 return URL; |
355 | 353 |
356 // Presence of a password means this is likely a URL. Note that unless the | 354 // Presence of a password means this is likely a URL. Note that unless the |
357 // user has typed an explicit "http://" or similar, we'll probably think that | 355 // user has typed an explicit "http://" or similar, we'll probably think that |
358 // the username is some unknown scheme, and bail out in the scheme-handling | 356 // the username is some unknown scheme, and bail out in the scheme-handling |
359 // code above. | 357 // code above. |
360 if (parts->password.is_nonempty()) | 358 if (parts->password.is_nonempty()) |
361 return URL; | 359 return URL; |
362 | 360 |
363 // Trailing slashes force the input to be treated as a URL. | 361 // Trailing slashes force the input to be treated as a URL. |
364 if (parts->path.len == 1) | 362 if (parts->path.len == 1) |
365 return URL; | 363 return URL; |
366 | 364 |
367 // If we reach here with a username, but no port or path, our input looks like | 365 // If there is more than one recognized non-host component, this is likely to |
368 // "user@host". Because there is no scheme explicitly specified, we think | 366 // be a URL, even if the TLD is unknown (in which case this is likely an |
369 // this is more likely an email address than an HTTP auth attempt. Hence, we | 367 // intranet URL). |
370 // search by default and let users correct us on a case-by-case basis. | 368 if (NumNonHostComponents(*parts) > 1) |
371 if (parts->username.is_nonempty() && !parts->port.is_nonempty() && | 369 return URL; |
372 !parts->path.is_nonempty()) | |
373 return UNKNOWN; | |
374 | 370 |
375 // If the host has a known TLD, it's probably a URL. Also special-case | 371 // If the host has a known TLD, it's probably a URL, with the following |
376 // "localhost" as a known hostname. | 372 // exceptions: |
377 if ((registry_length != 0) || (host == ASCIIToUTF16("localhost"))) | 373 // * Any "IP addresses" that make it here are more likely searches |
378 return URL; | 374 // (see above). |
| 375 // * If we reach here with a username, our input looks like "user@host[.tld]". |
| 376 // Because there is no scheme explicitly specified, we think this is more |
| 377 // likely an email address than an HTTP auth attempt. Hence, we search by |
| 378 // default and let users correct us on a case-by-case basis. |
| 379 // Note that we special-case "localhost" as a known hostname. |
| 380 if ((host_info.family != url_canon::CanonHostInfo::IPV4) && |
| 381 ((registry_length != 0) || (host == ASCIIToUTF16("localhost")))) |
| 382 return parts->username.is_nonempty() ? UNKNOWN : URL; |
379 | 383 |
380 // If we reach this point, we know there's no known TLD on the input, so if | 384 // If we reach this point, we know there's no known TLD on the input, so if |
381 // the user wishes to add a desired_tld, the fixup code will oblige; thus this | 385 // the user wishes to add a desired_tld, the fixup code will oblige; thus this |
382 // is a URL. | 386 // is a URL. |
383 if (!desired_tld.empty()) | 387 if (!desired_tld.empty()) |
384 return REQUESTED_URL; | 388 return REQUESTED_URL; |
385 | 389 |
386 // No scheme, username, password, port, path, and no known TLD on the host. | 390 // No scheme, password, port, path, and no known TLD on the host. |
387 // This could be: | 391 // This could be: |
| 392 // * An "incomplete IP address"; likely a search (see above). |
| 393 // * An email-like input like "user@host", where "host" has no known TLD. |
| 394 // It's not clear what the user means here and searching seems reasonable. |
388 // * A single word "foo"; possibly an intranet site, but more likely a search. | 395 // * A single word "foo"; possibly an intranet site, but more likely a search. |
389 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code | 396 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code |
390 // catch our mistakes. | 397 // catch our mistakes. |
391 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds | 398 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds |
392 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know | 399 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know |
393 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really | 400 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really |
394 // distinguish this case from: | 401 // distinguish this case from: |
395 // * A "URL-like" string that's not really a URL (like | 402 // * A "URL-like" string that's not really a URL (like |
396 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a | 403 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a |
397 // QUERY. Since this is indistinguishable from the case above, and this | 404 // QUERY. Since this is indistinguishable from the case above, and this |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
449 if (!net::CanStripTrailingSlash(url)) | 456 if (!net::CanStripTrailingSlash(url)) |
450 return formatted_url; | 457 return formatted_url; |
451 const string16 url_with_path(formatted_url + char16('/')); | 458 const string16 url_with_path(formatted_url + char16('/')); |
452 return (AutocompleteInput::Parse(formatted_url, string16(), NULL, NULL, | 459 return (AutocompleteInput::Parse(formatted_url, string16(), NULL, NULL, |
453 NULL) == | 460 NULL) == |
454 AutocompleteInput::Parse(url_with_path, string16(), NULL, NULL, | 461 AutocompleteInput::Parse(url_with_path, string16(), NULL, NULL, |
455 NULL)) ? | 462 NULL)) ? |
456 formatted_url : url_with_path; | 463 formatted_url : url_with_path; |
457 } | 464 } |
458 | 465 |
| 466 // static |
| 467 int AutocompleteInput::NumNonHostComponents(const url_parse::Parsed& parts) { |
| 468 int num_nonhost_components = 0; |
| 469 if (parts.scheme.is_nonempty()) |
| 470 ++num_nonhost_components; |
| 471 if (parts.username.is_nonempty()) |
| 472 ++num_nonhost_components; |
| 473 if (parts.password.is_nonempty()) |
| 474 ++num_nonhost_components; |
| 475 if (parts.port.is_nonempty()) |
| 476 ++num_nonhost_components; |
| 477 if (parts.path.is_nonempty()) |
| 478 ++num_nonhost_components; |
| 479 if (parts.query.is_nonempty()) |
| 480 ++num_nonhost_components; |
| 481 if (parts.ref.is_nonempty()) |
| 482 ++num_nonhost_components; |
| 483 return num_nonhost_components; |
| 484 } |
| 485 |
459 void AutocompleteInput::UpdateText(const string16& text, | 486 void AutocompleteInput::UpdateText(const string16& text, |
460 const url_parse::Parsed& parts) { | 487 const url_parse::Parsed& parts) { |
461 text_ = text; | 488 text_ = text; |
462 parts_ = parts; | 489 parts_ = parts; |
463 } | 490 } |
464 | 491 |
465 bool AutocompleteInput::Equals(const AutocompleteInput& other) const { | 492 bool AutocompleteInput::Equals(const AutocompleteInput& other) const { |
466 return (text_ == other.text_) && | 493 return (text_ == other.text_) && |
467 (type_ == other.type_) && | 494 (type_ == other.type_) && |
468 (desired_tld_ == other.desired_tld_) && | 495 (desired_tld_ == other.desired_tld_) && |
(...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1014 } | 1041 } |
1015 done_ = true; | 1042 done_ = true; |
1016 } | 1043 } |
1017 | 1044 |
1018 void AutocompleteController::StartExpireTimer() { | 1045 void AutocompleteController::StartExpireTimer() { |
1019 if (result_.HasCopiedMatches()) | 1046 if (result_.HasCopiedMatches()) |
1020 expire_timer_.Start(FROM_HERE, | 1047 expire_timer_.Start(FROM_HERE, |
1021 base::TimeDelta::FromMilliseconds(kExpireTimeMS), | 1048 base::TimeDelta::FromMilliseconds(kExpireTimeMS), |
1022 this, &AutocompleteController::ExpireCopiedEntries); | 1049 this, &AutocompleteController::ExpireCopiedEntries); |
1023 } | 1050 } |
OLD | NEW |