Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: chrome/browser/autocomplete/search_provider.cc

Issue 7314018: Don't autocomplete searches of >1 word if they've only been visited once and the user has not yet... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autocomplete/search_provider.h" 5 #include "chrome/browser/autocomplete/search_provider.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <cmath> 8 #include <cmath>
9 9
10 #include "base/callback.h" 10 #include "base/callback.h"
11 #include "base/i18n/break_iterator.h"
11 #include "base/i18n/case_conversion.h" 12 #include "base/i18n/case_conversion.h"
12 #include "base/i18n/icu_string_conversions.h" 13 #include "base/i18n/icu_string_conversions.h"
13 #include "base/message_loop.h" 14 #include "base/message_loop.h"
14 #include "base/string16.h" 15 #include "base/string16.h"
15 #include "base/utf_string_conversions.h" 16 #include "base/utf_string_conversions.h"
16 #include "chrome/browser/autocomplete/autocomplete_classifier.h" 17 #include "chrome/browser/autocomplete/autocomplete_classifier.h"
17 #include "chrome/browser/autocomplete/autocomplete_match.h" 18 #include "chrome/browser/autocomplete/autocomplete_match.h"
18 #include "chrome/browser/autocomplete/keyword_provider.h" 19 #include "chrome/browser/autocomplete/keyword_provider.h"
19 #include "chrome/browser/history/history.h" 20 #include "chrome/browser/history/history.h"
20 #include "chrome/browser/instant/instant_controller.h" 21 #include "chrome/browser/instant/instant_controller.h"
21 #include "chrome/browser/net/url_fixer_upper.h" 22 #include "chrome/browser/net/url_fixer_upper.h"
22 #include "chrome/browser/prefs/pref_service.h" 23 #include "chrome/browser/prefs/pref_service.h"
23 #include "chrome/browser/profiles/profile.h" 24 #include "chrome/browser/profiles/profile.h"
24 #include "chrome/browser/history/in_memory_database.h" 25 #include "chrome/browser/history/in_memory_database.h"
25 #include "chrome/browser/search_engines/template_url_service.h" 26 #include "chrome/browser/search_engines/template_url_service.h"
26 #include "chrome/browser/search_engines/template_url_service_factory.h" 27 #include "chrome/browser/search_engines/template_url_service_factory.h"
27 #include "chrome/common/pref_names.h" 28 #include "chrome/common/pref_names.h"
28 #include "chrome/common/url_constants.h" 29 #include "chrome/common/url_constants.h"
29 #include "content/common/json_value_serializer.h" 30 #include "content/common/json_value_serializer.h"
30 #include "googleurl/src/url_util.h" 31 #include "googleurl/src/url_util.h"
31 #include "grit/generated_resources.h" 32 #include "grit/generated_resources.h"
32 #include "net/base/escape.h" 33 #include "net/base/escape.h"
33 #include "net/http/http_response_headers.h" 34 #include "net/http/http_response_headers.h"
34 #include "net/url_request/url_request_status.h" 35 #include "net/url_request/url_request_status.h"
35 #include "ui/base/l10n/l10n_util.h" 36 #include "ui/base/l10n/l10n_util.h"
36 37
37 using base::Time; 38 using base::Time;
38 using base::TimeDelta; 39 using base::TimeDelta;
39 40
41 namespace {
42
43 bool HasMultipleWords(const string16& text) {
44 base::i18n::BreakIterator i(text, base::i18n::BreakIterator::BREAK_WORD);
45 bool found_word = false;
46 if (i.Init()) {
47 while (i.Advance()) {
48 if (i.IsWord()) {
49 if (found_word)
50 return true;
51 found_word = true;
52 }
53 }
54 }
55 return false;
56 }
57
58 };
59
40 // static 60 // static
41 const int SearchProvider::kDefaultProviderURLFetcherID = 1; 61 const int SearchProvider::kDefaultProviderURLFetcherID = 1;
42 // static 62 // static
43 const int SearchProvider::kKeywordProviderURLFetcherID = 2; 63 const int SearchProvider::kKeywordProviderURLFetcherID = 2;
44 64
45 // static 65 // static
46 bool SearchProvider::query_suggest_immediately_ = false; 66 bool SearchProvider::query_suggest_immediately_ = false;
47 67
48 void SearchProvider::Providers::Set(const TemplateURL* default_provider, 68 void SearchProvider::Providers::Set(const TemplateURL* default_provider,
49 const TemplateURL* keyword_provider) { 69 const TemplateURL* keyword_provider) {
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 return; 200 return;
181 } 201 }
182 202
183 input_ = input; 203 input_ = input;
184 204
185 DoHistoryQuery(minimal_changes); 205 DoHistoryQuery(minimal_changes);
186 StartOrStopSuggestQuery(minimal_changes); 206 StartOrStopSuggestQuery(minimal_changes);
187 ConvertResultsToAutocompleteMatches(); 207 ConvertResultsToAutocompleteMatches();
188 } 208 }
189 209
210 class SearchProvider::CompareScoredTerms {
211 public:
212 bool operator()(const ScoredTerm& a, const ScoredTerm& b) {
213 // Sort in descending relevance order.
214 return a.second > b.second;
215 }
216 };
217
190 void SearchProvider::Run() { 218 void SearchProvider::Run() {
191 // Start a new request with the current input. 219 // Start a new request with the current input.
192 DCHECK(!done_); 220 DCHECK(!done_);
193 suggest_results_pending_ = 0; 221 suggest_results_pending_ = 0;
194 if (providers_.valid_suggest_for_keyword_provider()) { 222 if (providers_.valid_suggest_for_keyword_provider()) {
195 suggest_results_pending_++; 223 suggest_results_pending_++;
196 keyword_fetcher_.reset( 224 keyword_fetcher_.reset(
197 CreateSuggestFetcher(kKeywordProviderURLFetcherID, 225 CreateSuggestFetcher(kKeywordProviderURLFetcherID,
198 providers_.keyword_provider(), 226 providers_.keyword_provider(),
199 keyword_input_text_)); 227 keyword_input_text_));
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
274 keyword_history_results_.clear(); 302 keyword_history_results_.clear();
275 default_history_results_.clear(); 303 default_history_results_.clear();
276 304
277 HistoryService* const history_service = 305 HistoryService* const history_service =
278 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); 306 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
279 history::URLDatabase* url_db = history_service ? 307 history::URLDatabase* url_db = history_service ?
280 history_service->InMemoryDatabase() : NULL; 308 history_service->InMemoryDatabase() : NULL;
281 if (!url_db) 309 if (!url_db)
282 return; 310 return;
283 311
284 // Request history for both the keyword and default provider. 312 // Request history for both the keyword and default provider. We grab many
313 // more matches than we'll ultimately clamp to so that if there are several
314 // recent multi-word matches who scores are lowered (see
315 // AddHistoryResultsToMap()), they won't crowd out older, higher-scoring
316 // matches. Note that this doesn't fix the problem entirely, but merely
317 // limits it to cases with a very large number of such multi-word matches; for
318 // now, this seems OK compared with the complexity of a real fix, which would
319 // require multiple searches and tracking of "single- vs. multi-word" in the
320 // database.
321 int num_matches = kMaxMatches * 5;
285 if (providers_.valid_keyword_provider()) { 322 if (providers_.valid_keyword_provider()) {
286 url_db->GetMostRecentKeywordSearchTerms( 323 url_db->GetMostRecentKeywordSearchTerms(providers_.keyword_provider().id(),
287 providers_.keyword_provider().id(), 324 keyword_input_text_, num_matches, &keyword_history_results_);
288 keyword_input_text_,
289 static_cast<int>(kMaxMatches),
290 &keyword_history_results_);
291 } 325 }
292 if (providers_.valid_default_provider()) { 326 if (providers_.valid_default_provider()) {
293 url_db->GetMostRecentKeywordSearchTerms( 327 url_db->GetMostRecentKeywordSearchTerms(providers_.default_provider().id(),
294 providers_.default_provider().id(), 328 input_.text(), num_matches, &default_history_results_);
295 input_.text(),
296 static_cast<int>(kMaxMatches),
297 &default_history_results_);
298 } 329 }
299 } 330 }
300 331
301 void SearchProvider::StartOrStopSuggestQuery(bool minimal_changes) { 332 void SearchProvider::StartOrStopSuggestQuery(bool minimal_changes) {
302 // Don't send any queries to the server until some time has elapsed after 333 // Don't send any queries to the server until some time has elapsed after
303 // the last keypress, to avoid flooding the server with requests we are 334 // the last keypress, to avoid flooding the server with requests we are
304 // likely to end up throwing away anyway. 335 // likely to end up throwing away anyway.
305 static const int kQueryDelayMs = 200; 336 static const int kQueryDelayMs = 200;
306 337
307 if (!IsQuerySuitableForSuggest()) { 338 if (!IsQuerySuitableForSuggest()) {
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
580 matches_.push_back(NavigationToMatch(navigation_results.front(), 611 matches_.push_back(NavigationToMatch(navigation_results.front(),
581 CalculateRelevanceForNavigation(num_results, 0, is_keyword), 612 CalculateRelevanceForNavigation(num_results, 0, is_keyword),
582 is_keyword)); 613 is_keyword));
583 } 614 }
584 } 615 }
585 616
586 void SearchProvider::AddHistoryResultsToMap(const HistoryResults& results, 617 void SearchProvider::AddHistoryResultsToMap(const HistoryResults& results,
587 bool is_keyword, 618 bool is_keyword,
588 int did_not_accept_suggestion, 619 int did_not_accept_suggestion,
589 MatchMap* map) { 620 MatchMap* map) {
590 int last_relevance = 0; 621 if (results.empty())
622 return;
623
624 bool base_prevent_inline_autocomplete =
625 (input_.type() == AutocompleteInput::URL) ||
626 input_.prevent_inline_autocomplete();
627 const string16& input_text(
628 is_keyword ? keyword_input_text_ : input_.text());
629 bool input_multiple_words = HasMultipleWords(input_text);
630
631 ScoredTerms scored_terms;
632 if (!base_prevent_inline_autocomplete && input_multiple_words) {
633 // ScoreHistoryTerms() allows autocompletion of multi-word, 1-visit queries
634 // if the input also has multiple words. But if we were already
635 // autocompleting a multi-word, multi-visit query, and the current input is
636 // still a prefix of it, then changing the autocompletion suddenly feels
637 // wrong. To detect this case, first score as if only one word has been
638 // typed, then check for a best result that is an autocompleted, multi-word
639 // query. If we find one, then just keep that score set.
640 scored_terms = ScoreHistoryTerms(results, base_prevent_inline_autocomplete,
641 false, input_text, is_keyword);
642 if ((scored_terms[0].second < AutocompleteResult::kLowestDefaultScore) ||
643 !HasMultipleWords(scored_terms[0].first))
644 scored_terms.clear(); // Didn't detect the case above, score normally.
645 }
646 if (scored_terms.empty())
647 scored_terms = ScoreHistoryTerms(results, base_prevent_inline_autocomplete,
648 input_multiple_words, input_text,
649 is_keyword);
650 for (ScoredTerms::const_iterator i(scored_terms.begin());
651 i != scored_terms.end(); ++i) {
652 AddMatchToMap(i->first, input_text, i->second,
653 AutocompleteMatch::SEARCH_HISTORY, did_not_accept_suggestion,
654 is_keyword, input_.prevent_inline_autocomplete(), map);
655 }
656 }
657
658 SearchProvider::ScoredTerms SearchProvider::ScoreHistoryTerms(
659 const HistoryResults& results,
660 bool base_prevent_inline_autocomplete,
661 bool input_multiple_words,
662 const string16& input_text,
663 bool is_keyword) {
591 AutocompleteClassifier* classifier = profile_->GetAutocompleteClassifier(); 664 AutocompleteClassifier* classifier = profile_->GetAutocompleteClassifier();
665 ScoredTerms scored_terms;
592 for (HistoryResults::const_iterator i(results.begin()); i != results.end(); 666 for (HistoryResults::const_iterator i(results.begin()); i != results.end();
593 ++i) { 667 ++i) {
594 // History returns results sorted for us. We force the relevance to decrease 668 // Don't autocomplete multi-word queries that have only been seen once
595 // so that the sort from history is honored. We should never end up with a 669 // unless the user has typed more than one word.
596 // match having a relevance greater than the previous, but they might be 670 bool prevent_inline_autocomplete = base_prevent_inline_autocomplete ||
597 // equal. If we didn't force the relevance to decrease and we ended up in a 671 (!input_multiple_words && (i->visits < 2) && HasMultipleWords(i->term));
598 // situation where the relevance was equal, then which was shown first would 672
599 // be random.
600 // This uses >= to handle the case where 3 or more results have the same
601 // relevance.
602 bool term_looks_like_url = false;
603 // Don't autocomplete search terms that would normally be treated as URLs 673 // Don't autocomplete search terms that would normally be treated as URLs
604 // when typed. For example, if the user searched for google.com and types 674 // when typed. For example, if the user searched for "google.com" and types
605 // goog, don't autocomplete to the search term google.com. Otherwise, the 675 // "goog", don't autocomplete to the search term "google.com". Otherwise,
606 // input will look like a URL but act like a search, which is confusing. 676 // the input will look like a URL but act like a search, which is confusing.
607 // NOTE: We don't check this in the following cases: 677 // NOTE: We don't check this in the following cases:
608 // * When inline autocomplete is disabled, we won't be inline 678 // * When inline autocomplete is disabled, we won't be inline
609 // autocompleting this term, so we don't need to worry about confusion as 679 // autocompleting this term, so we don't need to worry about confusion as
610 // much. This also prevents calling Classify() again from inside the 680 // much. This also prevents calling Classify() again from inside the
611 // classifier (which will corrupt state and likely crash), since the 681 // classifier (which will corrupt state and likely crash), since the
612 // classifier always disabled inline autocomplete. 682 // classifier always disables inline autocomplete.
613 // * When the user has typed the whole term, the "what you typed" history 683 // * When the user has typed the whole term, the "what you typed" history
614 // match will outrank us for URL-like inputs anyway, so we need not do 684 // match will outrank us for URL-like inputs anyway, so we need not do
615 // anything special. 685 // anything special.
616 if (!input_.prevent_inline_autocomplete() && classifier && 686 if (!prevent_inline_autocomplete && classifier && (i->term != input_text)) {
617 i->term != input_.text()) {
618 AutocompleteMatch match; 687 AutocompleteMatch match;
619 classifier->Classify(i->term, string16(), false, false, &match, NULL); 688 classifier->Classify(i->term, string16(), false, false, &match, NULL);
620 term_looks_like_url = match.transition == PageTransition::TYPED; 689 prevent_inline_autocomplete = match.transition == PageTransition::TYPED;
621 } 690 }
622 int relevance = CalculateRelevanceForHistory(i->time, term_looks_like_url, 691
623 is_keyword); 692 int relevance = CalculateRelevanceForHistory(i->time, is_keyword,
624 if (i != results.begin() && relevance >= last_relevance) 693 prevent_inline_autocomplete);
625 relevance = last_relevance - 1; 694 scored_terms.push_back(std::make_pair(i->term, relevance));
626 last_relevance = relevance;
627 AddMatchToMap(i->term,
628 is_keyword ? keyword_input_text_ : input_.text(),
629 relevance,
630 AutocompleteMatch::SEARCH_HISTORY, did_not_accept_suggestion,
631 is_keyword, input_.prevent_inline_autocomplete(),
632 map);
633 } 695 }
696
697 // History returns results sorted for us. However, we may have docked some
698 // results' scores, so things are no longer in order. Do a stable sort to get
699 // things back in order without otherwise disturbing results with equal
700 // scores, then force the scores to be unique, so that the order in which
701 // they're shown is deterministic.
702 std::stable_sort(scored_terms.begin(), scored_terms.end(),
703 CompareScoredTerms());
704 int last_relevance = 0;
705 for (ScoredTerms::iterator i(scored_terms.begin()); i != scored_terms.end();
706 ++i) {
707 if ((i != scored_terms.begin()) && (i->second >= last_relevance))
708 i->second = last_relevance - 1;
709 last_relevance = i->second;
710 }
711
712 return scored_terms;
634 } 713 }
635 714
636 void SearchProvider::AddSuggestResultsToMap( 715 void SearchProvider::AddSuggestResultsToMap(
637 const SuggestResults& suggest_results, 716 const SuggestResults& suggest_results,
638 bool is_keyword, 717 bool is_keyword,
639 int did_not_accept_suggestion, 718 int did_not_accept_suggestion,
640 MatchMap* map) { 719 MatchMap* map) {
641 for (size_t i = 0; i < suggest_results.size(); ++i) { 720 for (size_t i = 0; i < suggest_results.size(); ++i) {
642 AddMatchToMap(suggest_results[i], 721 AddMatchToMap(suggest_results[i],
643 is_keyword ? keyword_input_text_ : input_.text(), 722 is_keyword ? keyword_input_text_ : input_.text(),
(...skipping 20 matching lines...) Expand all
664 743
665 case AutocompleteInput::URL: 744 case AutocompleteInput::URL:
666 return 850; 745 return 850;
667 746
668 default: 747 default:
669 NOTREACHED(); 748 NOTREACHED();
670 return 0; 749 return 0;
671 } 750 }
672 } 751 }
673 752
674 int SearchProvider::CalculateRelevanceForHistory(const Time& time, 753 int SearchProvider::CalculateRelevanceForHistory(
675 bool looks_like_url, 754 const Time& time,
676 bool is_keyword) const { 755 bool is_keyword,
756 bool prevent_inline_autocomplete) const {
677 // The relevance of past searches falls off over time. There are two distinct 757 // The relevance of past searches falls off over time. There are two distinct
678 // equations used. If the first equation is used (searches to the primary 758 // equations used. If the first equation is used (searches to the primary
679 // provider with a type other than URL that don't autocomplete to a url) the 759 // provider that we want to inline autocomplete), the score starts at 1399 and
680 // score starts at 1399 and falls to 1300. If the second equation is used the 760 // falls to 1300. If the second equation is used the relevance of a search 15
681 // relevance of a search 15 minutes ago is discounted about 50 points, while 761 // minutes ago is discounted 50 points, while the relevance of a search two
682 // the relevance of a search two weeks ago is discounted about 450 points. 762 // weeks ago is discounted 450 points.
683 double elapsed_time = std::max((Time::Now() - time).InSecondsF(), 0.); 763 double elapsed_time = std::max((Time::Now() - time).InSecondsF(), 0.);
684 764 bool is_primary_provider = providers_.is_primary_provider(is_keyword);
685 if (providers_.is_primary_provider(is_keyword) && 765 if (is_primary_provider && !prevent_inline_autocomplete) {
686 input_.type() != AutocompleteInput::URL &&
687 !input_.prevent_inline_autocomplete() && !looks_like_url) {
688 // Searches with the past two days get a different curve. 766 // Searches with the past two days get a different curve.
689 const double autocomplete_time= 2 * 24 * 60 * 60; 767 const double autocomplete_time = 2 * 24 * 60 * 60;
690 if (elapsed_time < autocomplete_time) { 768 if (elapsed_time < autocomplete_time) {
691 return (is_keyword ? 1599 : 1399) - static_cast<int>(99 * 769 return (is_keyword ? 1599 : 1399) - static_cast<int>(99 *
692 std::pow(elapsed_time / autocomplete_time, 2.5)); 770 std::pow(elapsed_time / autocomplete_time, 2.5));
693 } 771 }
694 elapsed_time -= autocomplete_time; 772 elapsed_time -= autocomplete_time;
695 } 773 }
696 774
697 const int score_discount = 775 const int score_discount =
698 static_cast<int>(6.5 * std::pow(elapsed_time, 0.3)); 776 static_cast<int>(6.5 * std::pow(elapsed_time, 0.3));
699 777
700 // Don't let scores go below 0. Negative relevance scores are meaningful in 778 // Don't let scores go below 0. Negative relevance scores are meaningful in
701 // a different way. 779 // a different way.
702 int base_score; 780 int base_score;
703 if (!providers_.is_primary_provider(is_keyword)) 781 if (is_primary_provider)
782 base_score = (input_.type() == AutocompleteInput::URL) ? 750 : 1050;
783 else
704 base_score = 200; 784 base_score = 200;
705 else
706 base_score = (input_.type() == AutocompleteInput::URL) ? 750 : 1050;
707 return std::max(0, base_score - score_discount); 785 return std::max(0, base_score - score_discount);
708 } 786 }
709 787
710 int SearchProvider::CalculateRelevanceForSuggestion(size_t num_results, 788 int SearchProvider::CalculateRelevanceForSuggestion(size_t num_results,
711 size_t result_number, 789 size_t result_number,
712 bool is_keyword) const { 790 bool is_keyword) const {
713 DCHECK(result_number < num_results); 791 DCHECK(result_number < num_results);
714 int base_score; 792 int base_score;
715 if (!providers_.is_primary_provider(is_keyword)) 793 if (!providers_.is_primary_provider(is_keyword))
716 base_score = 100; 794 base_score = 100;
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
868 946
869 return match; 947 return match;
870 } 948 }
871 949
872 void SearchProvider::UpdateDone() { 950 void SearchProvider::UpdateDone() {
873 // We're done when there are no more suggest queries pending (this is set to 1 951 // We're done when there are no more suggest queries pending (this is set to 1
874 // when the timer is started) and we're not waiting on instant. 952 // when the timer is started) and we're not waiting on instant.
875 done_ = ((suggest_results_pending_ == 0) && 953 done_ = ((suggest_results_pending_ == 0) &&
876 (instant_finalized_ || !InstantController::IsEnabled(profile_))); 954 (instant_finalized_ || !InstantController::IsEnabled(profile_)));
877 } 955 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698