Index: components/omnibox/browser/scored_history_match.cc |
diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc |
index bdde8e547430e18c6a9659cc48203cde566edf58..534b194eab14072354c7060533b30a0c9294dc61 100644 |
--- a/components/omnibox/browser/scored_history_match.cc |
+++ b/components/omnibox/browser/scored_history_match.cc |
@@ -109,6 +109,8 @@ float ScoredHistoryMatch::bookmark_value_; |
float ScoredHistoryMatch::typed_value_; |
bool ScoredHistoryMatch::fix_few_visits_bug_; |
bool ScoredHistoryMatch::frequency_uses_sum_; |
+OmniboxFieldTrial::NumMatchesScores* |
+ ScoredHistoryMatch::num_matches_to_document_specificity_score_ = nullptr; |
size_t ScoredHistoryMatch::max_visits_to_score_; |
bool ScoredHistoryMatch::allow_tld_matches_; |
bool ScoredHistoryMatch::allow_scheme_matches_; |
@@ -133,8 +135,8 @@ ScoredHistoryMatch::ScoredHistoryMatch() |
WordStarts(), |
RowWordStarts(), |
false, |
- base::Time::Max()) { |
-} |
+ 1, |
+ base::Time::Max()) {} |
ScoredHistoryMatch::ScoredHistoryMatch( |
const history::URLRow& row, |
@@ -144,6 +146,7 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
const WordStarts& terms_to_word_starts_offsets, |
const RowWordStarts& word_starts, |
bool is_url_bookmarked, |
+ size_t num_matching_pages, |
base::Time now) |
: HistoryMatch(row, 0, false, false), raw_score(0) { |
// NOTE: Call Init() before doing any validity checking to ensure that the |
@@ -261,8 +264,11 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
const float topicality_score = GetTopicalityScore( |
terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); |
const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); |
- raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( |
- topicality_score, frequency_score, *hqp_relevance_buckets_)); |
+ const float specificity_score = |
+ GetDocumentSpecificityScore(num_matching_pages); |
+ raw_score = base::saturated_cast<int>( |
+ GetFinalRelevancyScore(topicality_score, frequency_score, |
+ specificity_score, *hqp_relevance_buckets_)); |
if (also_do_hup_like_scoring_ && likely_can_inline) { |
// HistoryURL-provider-like scoring gives any match that is |
@@ -412,6 +418,10 @@ void ScoredHistoryMatch::Init() { |
max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); |
frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); |
fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); |
+ num_matches_to_document_specificity_score_ = |
+ new OmniboxFieldTrial::NumMatchesScores(); |
+ (*num_matches_to_document_specificity_score_) = |
+ OmniboxFieldTrial::HQPNumMatchesScores(); |
allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); |
@@ -616,18 +626,32 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
ScoredHistoryMatch::max_visits_to_score_; |
} |
+float ScoredHistoryMatch::GetDocumentSpecificityScore( |
+ const size_t num_matching_pages) const { |
+ // The floating point value below doesn't matter. |
Peter Kasting
2016/12/06 05:19:30
I'm not sure that's true.
Since upper_bound() ret
Mark P
2016/12/08 00:21:31
Good point. I never imagined a field trial specif
|
+ OmniboxFieldTrial::NumMatchesScores::const_iterator it = |
+ std::upper_bound(num_matches_to_document_specificity_score_->begin(), |
+ num_matches_to_document_specificity_score_->end(), |
+ std::pair<size_t, double>{num_matching_pages, 1.0}); |
+ if (it == num_matches_to_document_specificity_score_->end()) |
+ return 1.0; |
+ return it->second; |
Peter Kasting
2016/12/06 05:19:30
Nit: Could use ?:
Mark P
2016/12/08 00:21:31
Done (after reversing the order, as that read bett
Peter Kasting
2016/12/08 00:51:40
My concern with the order reversal is that it sort
Mark P
2016/12/08 04:37:35
I understand the point, but I find != end to read
|
+}; |
+ |
// static |
float ScoredHistoryMatch::GetFinalRelevancyScore( |
float topicality_score, |
float frequency_score, |
+ float specificity_score, |
const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
DCHECK(hqp_relevance_buckets.size() > 0); |
DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); |
if (topicality_score == 0) |
return 0; |
- // Here's how to interpret intermediate_score: Suppose the omnibox |
- // has one input term. Suppose we have a URL for which the omnibox |
+ // Here's how to interpret intermediate_score: Suppose the omnibox has one |
+ // input term. Suppose the input matches many documents. (This implies |
+ // specificity_score == 1.0.) Suppose we have a URL for which the omnibox |
// input term has a single URL hostname hit at a word boundary. (This |
// implies topicality_score = 1.0.). Then the intermediate_score for |
// this URL will depend entirely on the frequency_score with |
@@ -651,7 +675,8 @@ float ScoredHistoryMatch::GetFinalRelevancyScore( |
// |
// The score maxes out at 1399 (i.e., cannot beat a good inlineable result |
// from HistoryURL provider). |
- const float intermediate_score = topicality_score * frequency_score; |
+ const float intermediate_score = |
+ topicality_score * frequency_score * specificity_score; |
// Find the threshold where intermediate score is greater than bucket. |
size_t i = 1; |