Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Side by Side Diff: chrome/browser/autocomplete/autocomplete.cc

Issue 8258004: Tweak omnibox parsing heuristics more: (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autocomplete/autocomplete.h" 5 #include "chrome/browser/autocomplete/autocomplete.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/basictypes.h" 9 #include "base/basictypes.h"
10 #include "base/command_line.h" 10 #include "base/command_line.h"
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 ((registry_length != 0) && (host.find(' ') == string16::npos))) ? 317 ((registry_length != 0) && (host.find(' ') == string16::npos))) ?
318 UNKNOWN : QUERY; 318 UNKNOWN : QUERY;
319 } 319 }
320 320
321 // A port number is a good indicator that this is a URL. However, it might 321 // A port number is a good indicator that this is a URL. However, it might
322 // also be a query like "1.66:1" that looks kind of like an IP address and 322 // also be a query like "1.66:1" that looks kind of like an IP address and
323 // port number. So here we only check for "port numbers" that are illegal and 323 // port number. So here we only check for "port numbers" that are illegal and
324 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save 324 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save
325 // handling legal port numbers until after the "IP address" determination 325 // handling legal port numbers until after the "IP address" determination
326 // below. 326 // below.
327 if (parts->port.is_nonempty()) { 327 if (url_parse::ParsePort(text.c_str(), parts->port) ==
328 int port; 328 url_parse::PORT_INVALID)
329 if (!base::StringToInt(text.substr(parts->port.begin, parts->port.len), 329 return QUERY;
330 &port) ||
331 (port < 0) || (port > 65535))
332 return QUERY;
333 }
334 330
335 // Now that we've ruled out all schemes other than http or https and done a 331 // Now that we've ruled out all schemes other than http or https and done a
336 // little more sanity checking, the presence of a scheme means this is likely 332 // little more sanity checking, the presence of a scheme means this is likely
337 // a URL. 333 // a URL.
338 if (parts->scheme.is_nonempty()) 334 if (parts->scheme.is_nonempty())
339 return URL; 335 return URL;
340 336
341 // See if the host is an IP address. 337 // See if the host is an IP address.
342 if (host_info.family == url_canon::CanonHostInfo::IPV4) {
343 // If the user originally typed a host that looks like an IP address (a
344 // dotted quad), they probably want to open it. If the original input was
345 // something else (like a single number), they probably wanted to search for
346 // it, unless they explicitly typed a scheme. This is true even if the URL
347 // appears to have a path: "1.2/45" is more likely a search (for the answer
348 // to a math problem) than a URL.
349 if (host_info.num_ipv4_components == 4)
350 return URL;
351 return desired_tld.empty() ? UNKNOWN : REQUESTED_URL;
352 }
353 if (host_info.family == url_canon::CanonHostInfo::IPV6) 338 if (host_info.family == url_canon::CanonHostInfo::IPV6)
354 return URL; 339 return URL;
340 // If the user originally typed a host that looks like an IP address (a
341 // dotted quad), they probably want to open it. If the original input was
342 // something else (like a single number), they probably wanted to search for
343 // it, unless they explicitly typed a scheme. This is true even if the URL
344 // appears to have a path: "1.2/45" is more likely a search (for the answer
345 // to a math problem) than a URL. However, if there are more non-host
346 // components, then maybe this really was intended to be a navigation. For
347 // this reason we only check the dotted-quad case here, and save the "other
348 // IP addresses" case for after we check the number of non-host components
349 // below.
350 if ((host_info.family == url_canon::CanonHostInfo::IPV4) &&
351 (host_info.num_ipv4_components == 4))
352 return URL;
355 353
356 // Presence of a password means this is likely a URL. Note that unless the 354 // Presence of a password means this is likely a URL. Note that unless the
357 // user has typed an explicit "http://" or similar, we'll probably think that 355 // user has typed an explicit "http://" or similar, we'll probably think that
358 // the username is some unknown scheme, and bail out in the scheme-handling 356 // the username is some unknown scheme, and bail out in the scheme-handling
359 // code above. 357 // code above.
360 if (parts->password.is_nonempty()) 358 if (parts->password.is_nonempty())
361 return URL; 359 return URL;
362 360
363 // Trailing slashes force the input to be treated as a URL. 361 // Trailing slashes force the input to be treated as a URL.
364 if (parts->path.len == 1) 362 if (parts->path.len == 1)
365 return URL; 363 return URL;
366 364
367 // If we reach here with a username, but no port or path, our input looks like 365 // If there is more than one recognized non-host component, this is likely to
368 // "user@host". Because there is no scheme explicitly specified, we think 366 // be a URL, even if the TLD is unknown (in which case this is likely an
369 // this is more likely an email address than an HTTP auth attempt. Hence, we 367 // intranet URL).
370 // search by default and let users correct us on a case-by-case basis. 368 if (NumNonHostComponents(*parts) > 1)
371 if (parts->username.is_nonempty() && !parts->port.is_nonempty() && 369 return URL;
372 !parts->path.is_nonempty())
373 return UNKNOWN;
374 370
375 // If the host has a known TLD, it's probably a URL. Also special-case 371 // If the host has a known TLD, it's probably a URL, with the following
376 // "localhost" as a known hostname. 372 // exceptions:
377 if ((registry_length != 0) || (host == ASCIIToUTF16("localhost"))) 373 // * Any "IP addresses" that make it here are more likely searches
378 return URL; 374 // (see above).
375 // * If we reach here with a username, our input looks like "user@host[.tld]".
376 // Because there is no scheme explicitly specified, we think this is more
377 // likely an email address than an HTTP auth attempt. Hence, we search by
378 // default and let users correct us on a case-by-case basis.
379 // Note that we special-case "localhost" as a known hostname.
380 if ((host_info.family != url_canon::CanonHostInfo::IPV4) &&
381 ((registry_length != 0) || (host == ASCIIToUTF16("localhost"))))
382 return parts->username.is_nonempty() ? UNKNOWN : URL;
379 383
380 // If we reach this point, we know there's no known TLD on the input, so if 384 // If we reach this point, we know there's no known TLD on the input, so if
381 // the user wishes to add a desired_tld, the fixup code will oblige; thus this 385 // the user wishes to add a desired_tld, the fixup code will oblige; thus this
382 // is a URL. 386 // is a URL.
383 if (!desired_tld.empty()) 387 if (!desired_tld.empty())
384 return REQUESTED_URL; 388 return REQUESTED_URL;
385 389
386 // No scheme, username, password, port, path, and no known TLD on the host. 390 // No scheme, password, port, path, and no known TLD on the host.
387 // This could be: 391 // This could be:
392 // * An "incomplete IP address"; likely a search (see above).
393 // * An email-like input like "user@host", where "host" has no known TLD.
394 // It's not clear what the user means here and searching seems reasonable.
388 // * A single word "foo"; possibly an intranet site, but more likely a search. 395 // * A single word "foo"; possibly an intranet site, but more likely a search.
389 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code 396 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code
390 // catch our mistakes. 397 // catch our mistakes.
391 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds 398 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds
392 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know 399 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know
393 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really 400 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really
394 // distinguish this case from: 401 // distinguish this case from:
395 // * A "URL-like" string that's not really a URL (like 402 // * A "URL-like" string that's not really a URL (like
396 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a 403 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a
397 // QUERY. Since this is indistinguishable from the case above, and this 404 // QUERY. Since this is indistinguishable from the case above, and this
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
449 if (!net::CanStripTrailingSlash(url)) 456 if (!net::CanStripTrailingSlash(url))
450 return formatted_url; 457 return formatted_url;
451 const string16 url_with_path(formatted_url + char16('/')); 458 const string16 url_with_path(formatted_url + char16('/'));
452 return (AutocompleteInput::Parse(formatted_url, string16(), NULL, NULL, 459 return (AutocompleteInput::Parse(formatted_url, string16(), NULL, NULL,
453 NULL) == 460 NULL) ==
454 AutocompleteInput::Parse(url_with_path, string16(), NULL, NULL, 461 AutocompleteInput::Parse(url_with_path, string16(), NULL, NULL,
455 NULL)) ? 462 NULL)) ?
456 formatted_url : url_with_path; 463 formatted_url : url_with_path;
457 } 464 }
458 465
466 // static
467 int AutocompleteInput::NumNonHostComponents(const url_parse::Parsed& parts) {
468 int num_nonhost_components = 0;
469 if (parts.scheme.is_nonempty())
470 ++num_nonhost_components;
471 if (parts.username.is_nonempty())
472 ++num_nonhost_components;
473 if (parts.password.is_nonempty())
474 ++num_nonhost_components;
475 if (parts.port.is_nonempty())
476 ++num_nonhost_components;
477 if (parts.path.is_nonempty())
478 ++num_nonhost_components;
479 if (parts.query.is_nonempty())
480 ++num_nonhost_components;
481 if (parts.ref.is_nonempty())
482 ++num_nonhost_components;
483 return num_nonhost_components;
484 }
485
459 void AutocompleteInput::UpdateText(const string16& text, 486 void AutocompleteInput::UpdateText(const string16& text,
460 const url_parse::Parsed& parts) { 487 const url_parse::Parsed& parts) {
461 text_ = text; 488 text_ = text;
462 parts_ = parts; 489 parts_ = parts;
463 } 490 }
464 491
465 bool AutocompleteInput::Equals(const AutocompleteInput& other) const { 492 bool AutocompleteInput::Equals(const AutocompleteInput& other) const {
466 return (text_ == other.text_) && 493 return (text_ == other.text_) &&
467 (type_ == other.type_) && 494 (type_ == other.type_) &&
468 (desired_tld_ == other.desired_tld_) && 495 (desired_tld_ == other.desired_tld_) &&
(...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after
1014 } 1041 }
1015 done_ = true; 1042 done_ = true;
1016 } 1043 }
1017 1044
1018 void AutocompleteController::StartExpireTimer() { 1045 void AutocompleteController::StartExpireTimer() {
1019 if (result_.HasCopiedMatches()) 1046 if (result_.HasCopiedMatches())
1020 expire_timer_.Start(FROM_HERE, 1047 expire_timer_.Start(FROM_HERE,
1021 base::TimeDelta::FromMilliseconds(kExpireTimeMS), 1048 base::TimeDelta::FromMilliseconds(kExpireTimeMS),
1022 this, &AutocompleteController::ExpireCopiedEntries); 1049 this, &AutocompleteController::ExpireCopiedEntries);
1023 } 1050 }
OLDNEW
« no previous file with comments | « chrome/browser/autocomplete/autocomplete.h ('k') | chrome/browser/autocomplete/autocomplete_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698