chrome/browser/history/scored_history_match.cc - Issue 905023003: Adding knobs on HQP provider.

Unified Diff: chrome/browser/history/scored_history_match.cc

Issue 905023003: Adding knobs on HQP provider. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressing mark comments. Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/history/scored_history_match.h ('K') | « chrome/browser/history/scored_history_match.h ('k') | chrome/browser/history/scored_history_match_unittest.cc » ('j') | components/omnibox/omnibox_field_trial.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/history/scored_history_match.cc

diff --git a/chrome/browser/history/scored_history_match.cc b/chrome/browser/history/scored_history_match.cc

index 1f1184c859273b31a94a901238661ce72bd018d8..aa769d2f2eb60453a5b06d43561bd89f1d238097 100644

--- a/chrome/browser/history/scored_history_match.cc

+++ b/chrome/browser/history/scored_history_match.cc

@@ -14,6 +14,8 @@

#include "base/logging.h"

#include "base/metrics/histogram.h"

+#include "base/strings/string_number_conversions.h"

+#include "base/strings/string_split.h"

#include "base/strings/string_util.h"

#include "base/strings/utf_string_conversions.h"

#include "chrome/browser/autocomplete/history_url_provider.h"

@@ -40,6 +42,11 @@ bool ScoredHistoryMatch::allow_scheme_matches_ = false;

bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false;

int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1;

+bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false;

Mark P 2015/02/14 01:27:14 (1) These static initializers should be in the ord

Ashok vardhan 2015/02/17 01:23:52 Done.

+float ScoredHistoryMatch::topicality_threshold_ = -1;

+std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*

+ ScoredHistoryMatch::hqp_relevance_buckets_ = NULL;

ScoredHistoryMatch::ScoredHistoryMatch()

: raw_score_(0),

can_inline_(false) {

@@ -154,7 +161,8 @@ ScoredHistoryMatch::ScoredHistoryMatch(

terms.size(), url, terms_to_word_starts_offsets, word_starts);

const float frequency_score = GetFrequency(

now, (history_client && history_client->IsBookmarked(gurl)), visits);

- raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score);

+ raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score,

+ *hqp_relevance_buckets_);

raw_score_ =

(raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max;

@@ -438,7 +446,15 @@ float ScoredHistoryMatch::GetTopicalityScore(

// TODO(mpearson): If there are multiple terms, consider taking the

// geometric mean of per-term scores rather than the arithmetic mean.

- return topicality_score / num_terms;

+ float final_topicality_score = topicality_score / num_terms;

+ // Demote all the URLs if the topicality score is less than threshold.

+ if (hqp_experimental_scoring_enabled_ &&

+ (final_topicality_score < topicality_threshold_)) {

+ return 0.0;

+ }

+ return final_topicality_score;

}

// static

@@ -542,8 +558,10 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now,

}

// static

-float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

- float frequency_score) {

+float ScoredHistoryMatch::GetFinalRelevancyScore(

+ float topicality_score,

+ float frequency_score,

+ const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {

if (topicality_score == 0)

return 0;

// Here's how to interpret intermediate_score: Suppose the omnibox

@@ -559,29 +577,91 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

// - a typed visit once a week -> 11.77

// - a typed visit every three days -> 14.12

// - at least ten typed visits today -> 20.0 (maximum score)

- const float intermediate_score = topicality_score * frequency_score;

+ //

// The below code maps intermediate_score to the range [0, 1399].

+ // For example:

+ // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399"

Mark P 2015/02/14 01:27:13 I think you'll want your format to require having

Ashok vardhan 2015/02/17 01:23:52 Make sense. Thought all the minimum scores for pro

+ // We will linearly interpolate the scores between:

+ // 0 to 1.5 --> 400 to 600

+ // 1.5 to 12.0 --> 600 to 1300

+ // 12.0 to 20.0 --> 1300 to 1399

+ // >= 20.0 --> 1399

+ //

// The score maxes out at 1400 (i.e., cannot beat a good inline result).

Mark P 2015/02/14 01:27:14 1400 -> 1399 also, good inline result -> good inli

Ashok vardhan 2015/02/17 01:23:52 Done.

- if (intermediate_score <= 1) {

- // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400

- // and 1.5 has a score of 600.

- const float slope = (600 - 400) / (1.5f - 0.0f);

- return 400 + slope * intermediate_score;

+ //

+ // If experimental scoring is enabled, then the score buckets will be like:

Mark P 2015/02/14 01:27:14 I don't know what line 592-593 are adding. Is it

Ashok vardhan 2015/02/17 01:23:52 Done.

+ // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300"

+ const float intermediate_score = topicality_score * frequency_score;

+ double base_intermediate_score = 0.0;

Mark P 2015/02/14 01:27:14 Please comment the variables in this block more or

Ashok vardhan 2015/02/17 01:23:52 Acknowledged.

+ int base_hqp_score = 400;

+ double max_intermediate_score = base_intermediate_score;

+ int max_hqp_score = base_hqp_score;

+ // Find the threshold where intermediate score is greater than bucket.

+ for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) {

+ ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i];

Mark P 2015/02/14 01:27:14 const &

Ashok vardhan 2015/02/17 01:23:52 Done.

+ max_intermediate_score = hqp_bucket.first;

+ max_hqp_score = hqp_bucket.second;

+ if (intermediate_score <= max_intermediate_score) {

+ const float slope = (

+ (max_hqp_score - base_hqp_score) /

+ (max_intermediate_score - base_intermediate_score));

+ const int final_hqp_score = (base_hqp_score +

Mark P 2015/02/14 01:27:13 does the whole right side of the equals fit on one

Mark P 2015/02/14 01:27:14 This function returns a float. Don't force it to

Ashok vardhan 2015/02/17 01:23:52 Nope. And also i guess its easy to read if it is l

Ashok vardhan 2015/02/17 01:23:52 Done.

+ (slope * (intermediate_score -

+ base_intermediate_score)));

+ return std::min(final_hqp_score, max_hqp_score);

Mark P 2015/02/14 01:27:13 Why is this min necessary?

Ashok vardhan 2015/02/17 01:23:52 Done.

+ }

+ base_intermediate_score = max_intermediate_score;

+ base_hqp_score = max_hqp_score;

}

- if (intermediate_score <= 12.0) {

- // Linearly extrapolate up to 12 so 12 has a score of 1300.

- const float slope = (1300 - 600) / (12.0f - 1.5f);

- return 600 + slope * (intermediate_score - 1.5);

+ // It will reach this stage when the score is > highest bucket score or

+ // when buckets are not specified. Return max_hqp_score.

Mark P 2015/02/14 01:27:14 Buckets should never be not specified. Correct th

Ashok vardhan 2015/02/17 01:23:52 Done.

+ return max_hqp_score;

+// static

+void ScoredHistoryMatch::InitializeHQPExperimentalParams() {

+ // These are default HQP relevance scoring buckets.

+ // See GetFinalRelevancyScore() for details.

+ std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399";

+ // Fetch the experiment params if they are any.

+ hqp_experimental_scoring_enabled_ =

+ OmniboxFieldTrial::HQPExperimentalScoringEnabled();

+ if (hqp_experimental_scoring_enabled_) {

+ // Add the topicality threshold from experiment params.

+ float hqp_experimental_topicality_threhold =

+ OmniboxFieldTrial::HQPExperimentalTopicalityThreshold();

+ if (hqp_experimental_topicality_threhold > 0)

Mark P 2015/02/14 01:27:14 Why have a >0 test? If you're checking for unspec

Ashok vardhan 2015/02/17 01:23:52 Done.

+ topicality_threshold_ = hqp_experimental_topicality_threhold;

+ // Add the HQP experimental scoring buckets.

+ std::string hqp_experimental_scoring_buckets =

+ OmniboxFieldTrial::HQPExperimentalScoringBuckets();

+ if (!hqp_experimental_scoring_buckets.empty())

+ hqp_relevance_buckets_str = hqp_experimental_scoring_buckets;

+ }

+ // Parse the hqp_relevance_buckets_str string once and store them in vector

+ // which is easy to access.

+ hqp_relevance_buckets_ =

+ new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>();

+ base::StringPairs kv_pairs;

+ if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str,

+ ':', ',', &kv_pairs)) {

+ for (base::StringPairs::const_iterator it = kv_pairs.begin();

+ it != kv_pairs.end(); ++it) {

+ ScoreMaxRelevance bucket;

+ base::StringToDouble(it->first, &bucket.first);

Mark P 2015/02/14 01:27:14 minor nit: consider DCHECKING the return value for

Ashok vardhan 2015/02/17 01:23:52 DCHECK seems to be not working here. std::strin

+ base::StringToInt(it->second, &bucket.second);

+ hqp_relevance_buckets_->push_back(bucket);

+ }

}

- // Linearly extrapolate so a score of 20 (or more) has a score of 1399.

- // (Scores above 20 are possible for URLs that have multiple term hits

- // in the URL and/or title and that are visited practically all

- // the time using typed visits. We don't attempt to distinguish

- // between these very good results.)

- const float slope = (1399 - 1300) / (20.0f - 12.0f);

- return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0));

}

+// static

void ScoredHistoryMatch::Init() {

if (initialized_)

return;

@@ -602,6 +682,10 @@ void ScoredHistoryMatch::Init() {

bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();

allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();

allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();

+ // Initialize the HQP Experimental scoring params.

+ InitializeHQPExperimentalParams();

initialized_ = true;

}