components/omnibox/browser/scored_history_match.cc - Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages

Unified Diff: components/omnibox/browser/scored_history_match.cc

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)

Patch Set: improved comments and formatting Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« components/omnibox/browser/scored_history_match.h ('K') | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/omnibox/browser/scored_history_match.cc

diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc

index bdde8e547430e18c6a9659cc48203cde566edf58..534b194eab14072354c7060533b30a0c9294dc61 100644

--- a/components/omnibox/browser/scored_history_match.cc

+++ b/components/omnibox/browser/scored_history_match.cc

@@ -109,6 +109,8 @@ float ScoredHistoryMatch::bookmark_value_;

float ScoredHistoryMatch::typed_value_;

bool ScoredHistoryMatch::fix_few_visits_bug_;

bool ScoredHistoryMatch::frequency_uses_sum_;

+OmniboxFieldTrial::NumMatchesScores*

+ ScoredHistoryMatch::num_matches_to_document_specificity_score_ = nullptr;

size_t ScoredHistoryMatch::max_visits_to_score_;

bool ScoredHistoryMatch::allow_tld_matches_;

bool ScoredHistoryMatch::allow_scheme_matches_;

@@ -133,8 +135,8 @@ ScoredHistoryMatch::ScoredHistoryMatch()

WordStarts(),

RowWordStarts(),

false,

- base::Time::Max()) {

+ 1,

+ base::Time::Max()) {}

ScoredHistoryMatch::ScoredHistoryMatch(

const history::URLRow& row,

@@ -144,6 +146,7 @@ ScoredHistoryMatch::ScoredHistoryMatch(

const WordStarts& terms_to_word_starts_offsets,

const RowWordStarts& word_starts,

bool is_url_bookmarked,

+ size_t num_matching_pages,

base::Time now)

: HistoryMatch(row, 0, false, false), raw_score(0) {

// NOTE: Call Init() before doing any validity checking to ensure that the

@@ -261,8 +264,11 @@ ScoredHistoryMatch::ScoredHistoryMatch(

const float topicality_score = GetTopicalityScore(

terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);

const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);

- raw_score = base::saturated_cast<int>(GetFinalRelevancyScore(

- topicality_score, frequency_score, *hqp_relevance_buckets_));

+ const float specificity_score =

+ GetDocumentSpecificityScore(num_matching_pages);

+ raw_score = base::saturated_cast<int>(

+ GetFinalRelevancyScore(topicality_score, frequency_score,

+ specificity_score, *hqp_relevance_buckets_));

if (also_do_hup_like_scoring_ && likely_can_inline) {

// HistoryURL-provider-like scoring gives any match that is

@@ -412,6 +418,10 @@ void ScoredHistoryMatch::Init() {

max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();

frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();

fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();

+ num_matches_to_document_specificity_score_ =

+ new OmniboxFieldTrial::NumMatchesScores();

+ (*num_matches_to_document_specificity_score_) =

+ OmniboxFieldTrial::HQPNumMatchesScores();

allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();

allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();

num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();

@@ -616,18 +626,32 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now,

ScoredHistoryMatch::max_visits_to_score_;

}

+float ScoredHistoryMatch::GetDocumentSpecificityScore(

+ const size_t num_matching_pages) const {

+ // The floating point value below doesn't matter.

Peter Kasting 2016/12/06 05:19:30 I'm not sure that's true. Since upper_bound() ret

Mark P 2016/12/08 00:21:31 Good point. I never imagined a field trial specif

+ OmniboxFieldTrial::NumMatchesScores::const_iterator it =

+ std::upper_bound(num_matches_to_document_specificity_score_->begin(),

+ num_matches_to_document_specificity_score_->end(),

+ std::pair<size_t, double>{num_matching_pages, 1.0});

+ if (it == num_matches_to_document_specificity_score_->end())

+ return 1.0;

+ return it->second;

Peter Kasting 2016/12/06 05:19:30 Nit: Could use ?:

Mark P 2016/12/08 00:21:31 Done (after reversing the order, as that read bett

Peter Kasting 2016/12/08 00:51:40 My concern with the order reversal is that it sort

Mark P 2016/12/08 04:37:35 I understand the point, but I find != end to read

+};

// static

float ScoredHistoryMatch::GetFinalRelevancyScore(

float topicality_score,

float frequency_score,

+ float specificity_score,

const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {

DCHECK(hqp_relevance_buckets.size() > 0);

DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0);

if (topicality_score == 0)

return 0;

- // Here's how to interpret intermediate_score: Suppose the omnibox

- // has one input term. Suppose we have a URL for which the omnibox

+ // Here's how to interpret intermediate_score: Suppose the omnibox has one

+ // input term. Suppose the input matches many documents. (This implies

+ // specificity_score == 1.0.) Suppose we have a URL for which the omnibox

// input term has a single URL hostname hit at a word boundary. (This

// implies topicality_score = 1.0.). Then the intermediate_score for

// this URL will depend entirely on the frequency_score with

@@ -651,7 +675,8 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(

// The score maxes out at 1399 (i.e., cannot beat a good inlineable result

// from HistoryURL provider).

- const float intermediate_score = topicality_score * frequency_score;

+ const float intermediate_score =

+ topicality_score * frequency_score * specificity_score;

// Find the threshold where intermediate score is greater than bucket.

size_t i = 1;