Chromium Code Reviews| Index: components/omnibox/browser/scored_history_match.cc |
| diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc |
| index bdde8e547430e18c6a9659cc48203cde566edf58..428989ce7d6a1f0008836c894d077df5db732e53 100644 |
| --- a/components/omnibox/browser/scored_history_match.cc |
| +++ b/components/omnibox/browser/scored_history_match.cc |
| @@ -10,6 +10,7 @@ |
| #include <vector> |
| #include "base/logging.h" |
| +#include "base/macros.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_split.h" |
| @@ -125,6 +126,10 @@ char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = |
| std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* |
| ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; |
| +OmniboxFieldTrial::NumMatchesScores* |
| + ScoredHistoryMatch::num_matches_to_document_specificity_score_override_ = |
| + nullptr; |
| + |
| ScoredHistoryMatch::ScoredHistoryMatch() |
| : ScoredHistoryMatch(history::URLRow(), |
| VisitInfoVector(), |
| @@ -133,8 +138,8 @@ ScoredHistoryMatch::ScoredHistoryMatch() |
| WordStarts(), |
| RowWordStarts(), |
| false, |
| - base::Time::Max()) { |
| -} |
| + 1, |
| + base::Time::Max()) {} |
| ScoredHistoryMatch::ScoredHistoryMatch( |
| const history::URLRow& row, |
| @@ -144,6 +149,7 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| const WordStarts& terms_to_word_starts_offsets, |
| const RowWordStarts& word_starts, |
| bool is_url_bookmarked, |
| + size_t num_matching_pages, |
| base::Time now) |
| : HistoryMatch(row, 0, false, false), raw_score(0) { |
| // NOTE: Call Init() before doing any validity checking to ensure that the |
| @@ -261,8 +267,11 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| const float topicality_score = GetTopicalityScore( |
| terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
| const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
| - raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( |
| - topicality_score, frequency_score, *hqp_relevance_buckets_)); |
| + const float specificity_score = |
| + GetDocumentSpecificityScore(num_matching_pages); |
| + raw_score = base::saturated_cast<int>( |
| + GetFinalRelevancyScore(topicality_score, frequency_score, |
| + specificity_score, *hqp_relevance_buckets_)); |
| if (also_do_hup_like_scoring_ && likely_can_inline) { |
| // HistoryURL-provider-like scoring gives any match that is |
| @@ -616,18 +625,43 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
| ScoredHistoryMatch::max_visits_to_score_; |
| } |
| +float ScoredHistoryMatch::GetDocumentSpecificityScore( |
| + size_t num_matching_pages) const { |
| + // A mapping from the number of matching pages to their associated document |
| + // specificity scores. See omnibox_field_trial.h for more details. |
| + CR_DEFINE_STATIC_LOCAL(OmniboxFieldTrial::NumMatchesScores, |
| + default_num_matches_to_document_specificity_score, |
| + (OmniboxFieldTrial::HQPNumMatchesScores())); |
| + OmniboxFieldTrial::NumMatchesScores* |
| + num_matches_to_document_specificity_score = |
| + num_matches_to_document_specificity_score_override_ |
| + ? num_matches_to_document_specificity_score_override_ |
| + : &default_num_matches_to_document_specificity_score; |
|
Peter Kasting
2016/12/10 02:22:26
I feel like there might be a way to shorten all th
Mark P
2016/12/11 05:11:37
I was wondering if there was a good way to shorten
|
| + |
| + // The floating point value below must be less than the lowest score the |
| + // server would send down. |
| + OmniboxFieldTrial::NumMatchesScores::const_iterator it = |
| + std::upper_bound(num_matches_to_document_specificity_score->begin(), |
| + num_matches_to_document_specificity_score->end(), |
| + std::pair<size_t, double>{num_matching_pages, -1}); |
| + return (it != num_matches_to_document_specificity_score->end()) ? it->second |
| + : 1.0; |
| +}; |
| + |
| // static |
| float ScoredHistoryMatch::GetFinalRelevancyScore( |
| float topicality_score, |
| float frequency_score, |
| + float specificity_score, |
| const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
| DCHECK(hqp_relevance_buckets.size() > 0); |
| DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); |
| if (topicality_score == 0) |
| return 0; |
| - // Here's how to interpret intermediate_score: Suppose the omnibox |
| - // has one input term. Suppose we have a URL for which the omnibox |
| + // Here's how to interpret intermediate_score: Suppose the omnibox has one |
| + // input term. Suppose the input matches many documents. (This implies |
| + // specificity_score == 1.0.) Suppose we have a URL for which the omnibox |
| // input term has a single URL hostname hit at a word boundary. (This |
| // implies topicality_score = 1.0.). Then the intermediate_score for |
| // this URL will depend entirely on the frequency_score with |
| @@ -651,7 +685,8 @@ float ScoredHistoryMatch::GetFinalRelevancyScore( |
| // |
| // The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
| // from HistoryURL provider). |
| - const float intermediate_score = topicality_score * frequency_score; |
| + const float intermediate_score = |
| + topicality_score * frequency_score * specificity_score; |
| // Find the threshold where intermediate score is greater than bucket. |
| size_t i = 1; |