Chromium Code Reviews| Index: components/omnibox/browser/scored_history_match.cc |
| diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc |
| index bdde8e547430e18c6a9659cc48203cde566edf58..534b194eab14072354c7060533b30a0c9294dc61 100644 |
| --- a/components/omnibox/browser/scored_history_match.cc |
| +++ b/components/omnibox/browser/scored_history_match.cc |
| @@ -109,6 +109,8 @@ float ScoredHistoryMatch::bookmark_value_; |
| float ScoredHistoryMatch::typed_value_; |
| bool ScoredHistoryMatch::fix_few_visits_bug_; |
| bool ScoredHistoryMatch::frequency_uses_sum_; |
| +OmniboxFieldTrial::NumMatchesScores* |
| + ScoredHistoryMatch::num_matches_to_document_specificity_score_ = nullptr; |
| size_t ScoredHistoryMatch::max_visits_to_score_; |
| bool ScoredHistoryMatch::allow_tld_matches_; |
| bool ScoredHistoryMatch::allow_scheme_matches_; |
| @@ -133,8 +135,8 @@ ScoredHistoryMatch::ScoredHistoryMatch() |
| WordStarts(), |
| RowWordStarts(), |
| false, |
| - base::Time::Max()) { |
| -} |
| + 1, |
| + base::Time::Max()) {} |
| ScoredHistoryMatch::ScoredHistoryMatch( |
| const history::URLRow& row, |
| @@ -144,6 +146,7 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| const WordStarts& terms_to_word_starts_offsets, |
| const RowWordStarts& word_starts, |
| bool is_url_bookmarked, |
| + size_t num_matching_pages, |
| base::Time now) |
| : HistoryMatch(row, 0, false, false), raw_score(0) { |
| // NOTE: Call Init() before doing any validity checking to ensure that the |
| @@ -261,8 +264,11 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| const float topicality_score = GetTopicalityScore( |
| terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
| const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
| - raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( |
| - topicality_score, frequency_score, *hqp_relevance_buckets_)); |
| + const float specificity_score = |
| + GetDocumentSpecificityScore(num_matching_pages); |
| + raw_score = base::saturated_cast<int>( |
| + GetFinalRelevancyScore(topicality_score, frequency_score, |
| + specificity_score, *hqp_relevance_buckets_)); |
| if (also_do_hup_like_scoring_ && likely_can_inline) { |
| // HistoryURL-provider-like scoring gives any match that is |
| @@ -412,6 +418,10 @@ void ScoredHistoryMatch::Init() { |
| max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); |
| frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); |
| fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); |
| + num_matches_to_document_specificity_score_ = |
| + new OmniboxFieldTrial::NumMatchesScores(); |
| + (*num_matches_to_document_specificity_score_) = |
| + OmniboxFieldTrial::HQPNumMatchesScores(); |
| allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
| allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
| num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); |
| @@ -616,18 +626,32 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
| ScoredHistoryMatch::max_visits_to_score_; |
| } |
| +float ScoredHistoryMatch::GetDocumentSpecificityScore( |
| + const size_t num_matching_pages) const { |
| + // The floating point value below doesn't matter. |
|
Peter Kasting
2016/12/06 05:19:30
I'm not sure that's true.
Since upper_bound() ret
Mark P
2016/12/08 00:21:31
Good point. I never imagined a field trial specif
|
| + OmniboxFieldTrial::NumMatchesScores::const_iterator it = |
| + std::upper_bound(num_matches_to_document_specificity_score_->begin(), |
| + num_matches_to_document_specificity_score_->end(), |
| + std::pair<size_t, double>{num_matching_pages, 1.0}); |
| + if (it == num_matches_to_document_specificity_score_->end()) |
| + return 1.0; |
| + return it->second; |
|
Peter Kasting
2016/12/06 05:19:30
Nit: Could use ?:
Mark P
2016/12/08 00:21:31
Done (after reversing the order, as that read bett
Peter Kasting
2016/12/08 00:51:40
My concern with the order reversal is that it sort
Mark P
2016/12/08 04:37:35
I understand the point, but I find != end to read
|
| +}; |
| + |
| // static |
| float ScoredHistoryMatch::GetFinalRelevancyScore( |
| float topicality_score, |
| float frequency_score, |
| + float specificity_score, |
| const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
| DCHECK(hqp_relevance_buckets.size() > 0); |
| DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); |
| if (topicality_score == 0) |
| return 0; |
| - // Here's how to interpret intermediate_score: Suppose the omnibox |
| - // has one input term. Suppose we have a URL for which the omnibox |
| + // Here's how to interpret intermediate_score: Suppose the omnibox has one |
| + // input term. Suppose the input matches many documents. (This implies |
| + // specificity_score == 1.0.) Suppose we have a URL for which the omnibox |
| // input term has a single URL hostname hit at a word boundary. (This |
| // implies topicality_score = 1.0.). Then the intermediate_score for |
| // this URL will depend entirely on the frequency_score with |
| @@ -651,7 +675,8 @@ float ScoredHistoryMatch::GetFinalRelevancyScore( |
| // |
| // The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
| // from HistoryURL provider). |
| - const float intermediate_score = topicality_score * frequency_score; |
| + const float intermediate_score = |
| + topicality_score * frequency_score * specificity_score; |
| // Find the threshold where intermediate score is greater than bucket. |
| size_t i = 1; |