Index: chrome/browser/autocomplete/search_provider.cc |
=================================================================== |
--- chrome/browser/autocomplete/search_provider.cc (revision 92729) |
+++ chrome/browser/autocomplete/search_provider.cc (working copy) |
@@ -8,6 +8,7 @@ |
#include <cmath> |
#include "base/callback.h" |
+#include "base/i18n/break_iterator.h" |
#include "base/i18n/case_conversion.h" |
#include "base/i18n/icu_string_conversions.h" |
#include "base/message_loop.h" |
@@ -37,6 +38,25 @@ |
using base::Time; |
using base::TimeDelta; |
+namespace { |
+ |
+bool HasMultipleWords(const string16& text) { |
+ base::i18n::BreakIterator i(text, base::i18n::BreakIterator::BREAK_WORD); |
+ bool found_word = false; |
+ if (i.Init()) { |
+ while (i.Advance()) { |
+ if (i.IsWord()) { |
+ if (found_word) |
+ return true; |
+ found_word = true; |
+ } |
+ } |
+ } |
+ return false; |
+} |
+ |
+}; |
+ |
// static |
const int SearchProvider::kDefaultProviderURLFetcherID = 1; |
// static |
@@ -187,6 +207,14 @@ |
ConvertResultsToAutocompleteMatches(); |
} |
+class SearchProvider::CompareScoredTerms { |
+ public: |
+ bool operator()(const ScoredTerm& a, const ScoredTerm& b) { |
+ // Sort in descending relevance order. |
+ return a.second > b.second; |
+ } |
+}; |
+ |
void SearchProvider::Run() { |
// Start a new request with the current input. |
DCHECK(!done_); |
@@ -281,20 +309,23 @@ |
if (!url_db) |
return; |
- // Request history for both the keyword and default provider. |
+ // Request history for both the keyword and default provider. We grab many |
+ // more matches than we'll ultimately clamp to so that if there are several |
+ // recent multi-word matches who scores are lowered (see |
+ // AddHistoryResultsToMap()), they won't crowd out older, higher-scoring |
+ // matches. Note that this doesn't fix the problem entirely, but merely |
+ // limits it to cases with a very large number of such multi-word matches; for |
+ // now, this seems OK compared with the complexity of a real fix, which would |
+ // require multiple searches and tracking of "single- vs. multi-word" in the |
+ // database. |
+ int num_matches = kMaxMatches * 5; |
if (providers_.valid_keyword_provider()) { |
- url_db->GetMostRecentKeywordSearchTerms( |
- providers_.keyword_provider().id(), |
- keyword_input_text_, |
- static_cast<int>(kMaxMatches), |
- &keyword_history_results_); |
+ url_db->GetMostRecentKeywordSearchTerms(providers_.keyword_provider().id(), |
+ keyword_input_text_, num_matches, &keyword_history_results_); |
} |
if (providers_.valid_default_provider()) { |
- url_db->GetMostRecentKeywordSearchTerms( |
- providers_.default_provider().id(), |
- input_.text(), |
- static_cast<int>(kMaxMatches), |
- &default_history_results_); |
+ url_db->GetMostRecentKeywordSearchTerms(providers_.default_provider().id(), |
+ input_.text(), num_matches, &default_history_results_); |
} |
} |
@@ -587,50 +618,98 @@ |
bool is_keyword, |
int did_not_accept_suggestion, |
MatchMap* map) { |
- int last_relevance = 0; |
+ if (results.empty()) |
+ return; |
+ |
+ bool base_prevent_inline_autocomplete = |
+ (input_.type() == AutocompleteInput::URL) || |
+ input_.prevent_inline_autocomplete(); |
+ const string16& input_text( |
+ is_keyword ? keyword_input_text_ : input_.text()); |
+ bool input_multiple_words = HasMultipleWords(input_text); |
+ |
+ ScoredTerms scored_terms; |
+ if (!base_prevent_inline_autocomplete && input_multiple_words) { |
+ // ScoreHistoryTerms() allows autocompletion of multi-word, 1-visit queries |
+ // if the input also has multiple words. But if we were already |
+ // autocompleting a multi-word, multi-visit query, and the current input is |
+ // still a prefix of it, then changing the autocompletion suddenly feels |
+ // wrong. To detect this case, first score as if only one word has been |
+ // typed, then check for a best result that is an autocompleted, multi-word |
+ // query. If we find one, then just keep that score set. |
+ scored_terms = ScoreHistoryTerms(results, base_prevent_inline_autocomplete, |
+ false, input_text, is_keyword); |
+ if ((scored_terms[0].second < AutocompleteResult::kLowestDefaultScore) || |
+ !HasMultipleWords(scored_terms[0].first)) |
+ scored_terms.clear(); // Didn't detect the case above, score normally. |
+ } |
+ if (scored_terms.empty()) |
+ scored_terms = ScoreHistoryTerms(results, base_prevent_inline_autocomplete, |
+ input_multiple_words, input_text, |
+ is_keyword); |
+ for (ScoredTerms::const_iterator i(scored_terms.begin()); |
+ i != scored_terms.end(); ++i) { |
+ AddMatchToMap(i->first, input_text, i->second, |
+ AutocompleteMatch::SEARCH_HISTORY, did_not_accept_suggestion, |
+ is_keyword, input_.prevent_inline_autocomplete(), map); |
+ } |
+} |
+ |
+SearchProvider::ScoredTerms SearchProvider::ScoreHistoryTerms( |
+ const HistoryResults& results, |
+ bool base_prevent_inline_autocomplete, |
+ bool input_multiple_words, |
+ const string16& input_text, |
+ bool is_keyword) { |
AutocompleteClassifier* classifier = profile_->GetAutocompleteClassifier(); |
+ ScoredTerms scored_terms; |
for (HistoryResults::const_iterator i(results.begin()); i != results.end(); |
++i) { |
- // History returns results sorted for us. We force the relevance to decrease |
- // so that the sort from history is honored. We should never end up with a |
- // match having a relevance greater than the previous, but they might be |
- // equal. If we didn't force the relevance to decrease and we ended up in a |
- // situation where the relevance was equal, then which was shown first would |
- // be random. |
- // This uses >= to handle the case where 3 or more results have the same |
- // relevance. |
- bool term_looks_like_url = false; |
+ // Don't autocomplete multi-word queries that have only been seen once |
+ // unless the user has typed more than one word. |
+ bool prevent_inline_autocomplete = base_prevent_inline_autocomplete || |
+ (!input_multiple_words && (i->visits < 2) && HasMultipleWords(i->term)); |
+ |
// Don't autocomplete search terms that would normally be treated as URLs |
- // when typed. For example, if the user searched for google.com and types |
- // goog, don't autocomplete to the search term google.com. Otherwise, the |
- // input will look like a URL but act like a search, which is confusing. |
+ // when typed. For example, if the user searched for "google.com" and types |
+ // "goog", don't autocomplete to the search term "google.com". Otherwise, |
+ // the input will look like a URL but act like a search, which is confusing. |
// NOTE: We don't check this in the following cases: |
// * When inline autocomplete is disabled, we won't be inline |
// autocompleting this term, so we don't need to worry about confusion as |
// much. This also prevents calling Classify() again from inside the |
// classifier (which will corrupt state and likely crash), since the |
- // classifier always disabled inline autocomplete. |
+ // classifier always disables inline autocomplete. |
// * When the user has typed the whole term, the "what you typed" history |
// match will outrank us for URL-like inputs anyway, so we need not do |
// anything special. |
- if (!input_.prevent_inline_autocomplete() && classifier && |
- i->term != input_.text()) { |
+ if (!prevent_inline_autocomplete && classifier && (i->term != input_text)) { |
AutocompleteMatch match; |
classifier->Classify(i->term, string16(), false, false, &match, NULL); |
- term_looks_like_url = match.transition == PageTransition::TYPED; |
+ prevent_inline_autocomplete = match.transition == PageTransition::TYPED; |
} |
- int relevance = CalculateRelevanceForHistory(i->time, term_looks_like_url, |
- is_keyword); |
- if (i != results.begin() && relevance >= last_relevance) |
- relevance = last_relevance - 1; |
- last_relevance = relevance; |
- AddMatchToMap(i->term, |
- is_keyword ? keyword_input_text_ : input_.text(), |
- relevance, |
- AutocompleteMatch::SEARCH_HISTORY, did_not_accept_suggestion, |
- is_keyword, input_.prevent_inline_autocomplete(), |
- map); |
+ |
+ int relevance = CalculateRelevanceForHistory(i->time, is_keyword, |
+ prevent_inline_autocomplete); |
+ scored_terms.push_back(std::make_pair(i->term, relevance)); |
} |
+ |
+ // History returns results sorted for us. However, we may have docked some |
+ // results' scores, so things are no longer in order. Do a stable sort to get |
+ // things back in order without otherwise disturbing results with equal |
+ // scores, then force the scores to be unique, so that the order in which |
+ // they're shown is deterministic. |
+ std::stable_sort(scored_terms.begin(), scored_terms.end(), |
+ CompareScoredTerms()); |
+ int last_relevance = 0; |
+ for (ScoredTerms::iterator i(scored_terms.begin()); i != scored_terms.end(); |
+ ++i) { |
+ if ((i != scored_terms.begin()) && (i->second >= last_relevance)) |
+ i->second = last_relevance - 1; |
+ last_relevance = i->second; |
+ } |
+ |
+ return scored_terms; |
} |
void SearchProvider::AddSuggestResultsToMap( |
@@ -671,22 +750,21 @@ |
} |
} |
-int SearchProvider::CalculateRelevanceForHistory(const Time& time, |
- bool looks_like_url, |
- bool is_keyword) const { |
+int SearchProvider::CalculateRelevanceForHistory( |
+ const Time& time, |
+ bool is_keyword, |
+ bool prevent_inline_autocomplete) const { |
// The relevance of past searches falls off over time. There are two distinct |
// equations used. If the first equation is used (searches to the primary |
- // provider with a type other than URL that don't autocomplete to a url) the |
- // score starts at 1399 and falls to 1300. If the second equation is used the |
- // relevance of a search 15 minutes ago is discounted about 50 points, while |
- // the relevance of a search two weeks ago is discounted about 450 points. |
+ // provider that we want to inline autocomplete), the score starts at 1399 and |
+ // falls to 1300. If the second equation is used the relevance of a search 15 |
+ // minutes ago is discounted 50 points, while the relevance of a search two |
+ // weeks ago is discounted 450 points. |
double elapsed_time = std::max((Time::Now() - time).InSecondsF(), 0.); |
- |
- if (providers_.is_primary_provider(is_keyword) && |
- input_.type() != AutocompleteInput::URL && |
- !input_.prevent_inline_autocomplete() && !looks_like_url) { |
+ bool is_primary_provider = providers_.is_primary_provider(is_keyword); |
+ if (is_primary_provider && !prevent_inline_autocomplete) { |
// Searches with the past two days get a different curve. |
- const double autocomplete_time= 2 * 24 * 60 * 60; |
+ const double autocomplete_time = 2 * 24 * 60 * 60; |
if (elapsed_time < autocomplete_time) { |
return (is_keyword ? 1599 : 1399) - static_cast<int>(99 * |
std::pow(elapsed_time / autocomplete_time, 2.5)); |
@@ -700,10 +778,10 @@ |
// Don't let scores go below 0. Negative relevance scores are meaningful in |
// a different way. |
int base_score; |
- if (!providers_.is_primary_provider(is_keyword)) |
+ if (is_primary_provider) |
+ base_score = (input_.type() == AutocompleteInput::URL) ? 750 : 1050; |
+ else |
base_score = 200; |
- else |
- base_score = (input_.type() == AutocompleteInput::URL) ? 750 : 1050; |
return std::max(0, base_score - score_discount); |
} |