chrome/browser/history/scored_history_match.cc - Issue 905023003: Adding knobs on HQP provider.

Unified Diff: chrome/browser/history/scored_history_match.cc

Issue 905023003: Adding knobs on HQP provider. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Initial Change to control HQP scoring. Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/history/scored_history_match.h ('K') | « chrome/browser/history/scored_history_match.h ('k') | chrome/browser/history/scored_history_match_unittest.cc » ('j') | chrome/browser/history/scored_history_match_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/history/scored_history_match.cc

diff --git a/chrome/browser/history/scored_history_match.cc b/chrome/browser/history/scored_history_match.cc

index 1f1184c859273b31a94a901238661ce72bd018d8..136755142c98f3c6180e07c24bb70a88f05c5b7a 100644

--- a/chrome/browser/history/scored_history_match.cc

+++ b/chrome/browser/history/scored_history_match.cc

@@ -14,6 +14,8 @@

#include "base/logging.h"

#include "base/metrics/histogram.h"

+#include "base/strings/string_number_conversions.h"

+#include "base/strings/string_split.h"

#include "base/strings/string_util.h"

#include "base/strings/utf_string_conversions.h"

#include "chrome/browser/autocomplete/history_url_provider.h"

@@ -61,6 +63,9 @@ ScoredHistoryMatch::ScoredHistoryMatch(

can_inline_(false) {

Init();

+ // Initialize the HQP scoring params.

+ InitializeHQPExperimentalParams();

GURL gurl = row.url();

if (!gurl.is_valid())

return;

@@ -154,7 +159,8 @@ ScoredHistoryMatch::ScoredHistoryMatch(

terms.size(), url, terms_to_word_starts_offsets, word_starts);

const float frequency_score = GetFrequency(

now, (history_client && history_client->IsBookmarked(gurl)), visits);

- raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score);

+ raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score,

+ hqp_relevance_buckets_);

raw_score_ =

(raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max;

@@ -438,7 +444,15 @@ float ScoredHistoryMatch::GetTopicalityScore(

// TODO(mpearson): If there are multiple terms, consider taking the

// geometric mean of per-term scores rather than the arithmetic mean.

- return topicality_score / num_terms;

+ float final_topicality_score = topicality_score / num_terms;

+ // Demote all the URLs if the topicality score is less than threshold.

+ if ((hqp_experimental_scoring_enabled_) &&

Bart N. 2015/02/10 01:22:29 Not sure but I don't think we need () around singl

Ashok vardhan 2015/02/10 23:57:45 Done.

+ (final_topicality_score < topicality_threshold_)) {

+ return 0.0;

Bart N. 2015/02/10 01:22:29 Wrong indent.

Ashok vardhan 2015/02/10 23:57:45 Done.

+ }

+ return final_topicality_score;

}

// static

@@ -542,8 +556,10 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now,

}

// static

-float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

- float frequency_score) {

+float ScoredHistoryMatch::GetFinalRelevancyScore(

+ float topicality_score,

+ float frequency_score,

+ std::string& hqp_relevance_buckets) {

if (topicality_score == 0)

return 0;

// Here's how to interpret intermediate_score: Suppose the omnibox

@@ -559,29 +575,84 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

// - a typed visit once a week -> 11.77

// - a typed visit every three days -> 14.12

// - at least ten typed visits today -> 20.0 (maximum score)

- const float intermediate_score = topicality_score * frequency_score;

+ //

// The below code maps intermediate_score to the range [0, 1399].

+ // For example:

+ // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399"

+ // We will linearly interpolate the scores between:

+ // 0 to 1.5 --> 400 to 600

+ // 1.5 to 12.0 --> 600 to 1300

+ // 12.0 to 20.0 --> 1300 to 1399

+ // >= 20.0 --> 1399

+ //

// The score maxes out at 1400 (i.e., cannot beat a good inline result).

- if (intermediate_score <= 1) {

- // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400

- // and 1.5 has a score of 600.

- const float slope = (600 - 400) / (1.5f - 0.0f);

- return 400 + slope * intermediate_score;

+ //

+ // If experimental scoring is enabled, then the score buckets will be like:

+ // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300"

+ const float intermediate_score = topicality_score * frequency_score;

+ double base_intermediate_score = 0.0;

+ int base_hqp_score = 400;

+ base::StringPairs kv_pairs;

+ if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets,

Bart N. 2015/02/10 01:22:29 I don't think you want to parse it each time... I'

Mark P 2015/02/10 18:33:40 Indeed, this function likely gets called thousands

Ashok vardhan 2015/02/10 23:57:45 Completely agreed and didn't realise. Changed the

Ashok vardhan 2015/02/10 23:57:45 Done.

+ ':', ',', &kv_pairs)) {

+ double max_intermediate_score = base_intermediate_score;

+ int max_hqp_score = base_hqp_score;

+ for (base::StringPairs::const_iterator it = kv_pairs.begin();

+ it != kv_pairs.end(); ++it) {

+ base::StringToDouble(it->first, &max_intermediate_score);

+ base::StringToInt(it->second, &max_hqp_score);

+ if (intermediate_score <= max_intermediate_score) {

+ const float slope = (

+ (max_hqp_score - base_hqp_score) /

+ (max_intermediate_score - base_intermediate_score));

+ const int final_hqp_score = (base_hqp_score +

+ (slope * (intermediate_score -

+ base_intermediate_score)));

+ return std::min(final_hqp_score, max_hqp_score);

+ }

+ base_intermediate_score = max_intermediate_score;

+ base_hqp_score = max_hqp_score;

+ }

+ // It will reach this stage when the score is > highest bucket score.

+ // Return max_hqp_score.

+ return max_hqp_score;

}

- if (intermediate_score <= 12.0) {

- // Linearly extrapolate up to 12 so 12 has a score of 1300.

- const float slope = (1300 - 600) / (12.0f - 1.5f);

- return 600 + slope * (intermediate_score - 1.5);

+ // Return min hqp score.

+ return base_hqp_score;

+void ScoredHistoryMatch::InitializeHQPExperimentalParams() {

+ // Initialize the hqp experiment params.

+ hqp_experimental_scoring_enabled_ = false;

+ topicality_threshold_ = -1;

+ // These are default HQP scoring params.

+ // See GetFinalRelevancyScore() for details.

+ hqp_relevance_buckets_ = "1.5:600,12.0:1300,20.0:1399";

+ // Fetch the experiment params if they are any.

+ hqp_experimental_scoring_enabled_ =

+ OmniboxFieldTrial::HQPExperimentalScoringEnabled();

+ if (!hqp_experimental_scoring_enabled_)

+ return;

+ // Add the topicality threshold from experiment params.

+ float hqp_experimental_topicality_threhold =

+ OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();

+ if (hqp_experimental_topicality_threhold > 0)

+ topicality_threshold_ = hqp_experimental_topicality_threhold;

+ // Add the HQP experimental scoring buckets.

+ std::string hqp_experimental_scoring_buckets =

+ OmniboxFieldTrial::HQPExperimentalScoringBuckets();

+ if (!hqp_experimental_scoring_buckets.empty()) {

+ hqp_relevance_buckets_ = hqp_experimental_scoring_buckets;

}

- // Linearly extrapolate so a score of 20 (or more) has a score of 1399.

- // (Scores above 20 are possible for URLs that have multiple term hits

- // in the URL and/or title and that are visited practically all

- // the time using typed visits. We don't attempt to distinguish

- // between these very good results.)

- const float slope = (1399 - 1300) / (20.0f - 12.0f);

- return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0));

}

+// static

void ScoredHistoryMatch::Init() {

if (initialized_)

return;

@@ -602,6 +673,7 @@ void ScoredHistoryMatch::Init() {

bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();

allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();

allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();

initialized_ = true;

}