Chromium Code Reviews| Index: chrome/browser/history/scored_history_match.cc |
| diff --git a/chrome/browser/history/scored_history_match.cc b/chrome/browser/history/scored_history_match.cc |
| index 1f1184c859273b31a94a901238661ce72bd018d8..aa769d2f2eb60453a5b06d43561bd89f1d238097 100644 |
| --- a/chrome/browser/history/scored_history_match.cc |
| +++ b/chrome/browser/history/scored_history_match.cc |
| @@ -14,6 +14,8 @@ |
| #include "base/logging.h" |
| #include "base/metrics/histogram.h" |
| +#include "base/strings/string_number_conversions.h" |
| +#include "base/strings/string_split.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "chrome/browser/autocomplete/history_url_provider.h" |
| @@ -40,6 +42,11 @@ bool ScoredHistoryMatch::allow_scheme_matches_ = false; |
| bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; |
| int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; |
| +bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false; |
|
Mark P
2015/02/14 01:27:14
(1) These static initializers should be in the ord
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| +float ScoredHistoryMatch::topicality_threshold_ = -1; |
| +std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* |
| + ScoredHistoryMatch::hqp_relevance_buckets_ = NULL; |
| + |
| ScoredHistoryMatch::ScoredHistoryMatch() |
| : raw_score_(0), |
| can_inline_(false) { |
| @@ -154,7 +161,8 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| terms.size(), url, terms_to_word_starts_offsets, word_starts); |
| const float frequency_score = GetFrequency( |
| now, (history_client && history_client->IsBookmarked(gurl)), visits); |
| - raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); |
| + raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score, |
| + *hqp_relevance_buckets_); |
| raw_score_ = |
| (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; |
| @@ -438,7 +446,15 @@ float ScoredHistoryMatch::GetTopicalityScore( |
| // TODO(mpearson): If there are multiple terms, consider taking the |
| // geometric mean of per-term scores rather than the arithmetic mean. |
| - return topicality_score / num_terms; |
| + float final_topicality_score = topicality_score / num_terms; |
| + |
| + // Demote all the URLs if the topicality score is less than threshold. |
| + if (hqp_experimental_scoring_enabled_ && |
| + (final_topicality_score < topicality_threshold_)) { |
| + return 0.0; |
| + } |
| + |
| + return final_topicality_score; |
| } |
| // static |
| @@ -542,8 +558,10 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
| } |
| // static |
| -float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
| - float frequency_score) { |
| +float ScoredHistoryMatch::GetFinalRelevancyScore( |
| + float topicality_score, |
| + float frequency_score, |
| + const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
| if (topicality_score == 0) |
| return 0; |
| // Here's how to interpret intermediate_score: Suppose the omnibox |
| @@ -559,29 +577,91 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
| // - a typed visit once a week -> 11.77 |
| // - a typed visit every three days -> 14.12 |
| // - at least ten typed visits today -> 20.0 (maximum score) |
| - const float intermediate_score = topicality_score * frequency_score; |
| + // |
| // The below code maps intermediate_score to the range [0, 1399]. |
| + // For example: |
| + // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399" |
|
Mark P
2015/02/14 01:27:13
I think you'll want your format to require having
Ashok vardhan
2015/02/17 01:23:52
Make sense. Thought all the minimum scores for pro
|
| + // We will linearly interpolate the scores between: |
| + // 0 to 1.5 --> 400 to 600 |
| + // 1.5 to 12.0 --> 600 to 1300 |
| + // 12.0 to 20.0 --> 1300 to 1399 |
| + // >= 20.0 --> 1399 |
| + // |
| // The score maxes out at 1400 (i.e., cannot beat a good inline result). |
|
Mark P
2015/02/14 01:27:14
1400 -> 1399
also, good inline result -> good inli
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| - if (intermediate_score <= 1) { |
| - // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 |
| - // and 1.5 has a score of 600. |
| - const float slope = (600 - 400) / (1.5f - 0.0f); |
| - return 400 + slope * intermediate_score; |
| + // |
| + // If experimental scoring is enabled, then the score buckets will be like: |
|
Mark P
2015/02/14 01:27:14
I don't know what line 592-593 are adding. Is it
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| + // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300" |
| + const float intermediate_score = topicality_score * frequency_score; |
| + |
| + double base_intermediate_score = 0.0; |
|
Mark P
2015/02/14 01:27:14
Please comment the variables in this block more or
Ashok vardhan
2015/02/17 01:23:52
Acknowledged.
|
| + int base_hqp_score = 400; |
| + double max_intermediate_score = base_intermediate_score; |
| + int max_hqp_score = base_hqp_score; |
| + |
| + // Find the threshold where intermediate score is greater than bucket. |
| + for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) { |
| + ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i]; |
|
Mark P
2015/02/14 01:27:14
const &
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| + max_intermediate_score = hqp_bucket.first; |
| + max_hqp_score = hqp_bucket.second; |
| + if (intermediate_score <= max_intermediate_score) { |
| + const float slope = ( |
| + (max_hqp_score - base_hqp_score) / |
| + (max_intermediate_score - base_intermediate_score)); |
| + const int final_hqp_score = (base_hqp_score + |
|
Mark P
2015/02/14 01:27:13
does the whole right side of the equals fit on one
Mark P
2015/02/14 01:27:14
This function returns a float. Don't force it to
Ashok vardhan
2015/02/17 01:23:52
Nope. And also i guess its easy to read if it is l
Ashok vardhan
2015/02/17 01:23:52
Done.
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| + (slope * (intermediate_score - |
| + base_intermediate_score))); |
| + return std::min(final_hqp_score, max_hqp_score); |
|
Mark P
2015/02/14 01:27:13
Why is this min necessary?
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| + } |
| + base_intermediate_score = max_intermediate_score; |
| + base_hqp_score = max_hqp_score; |
| } |
| - if (intermediate_score <= 12.0) { |
| - // Linearly extrapolate up to 12 so 12 has a score of 1300. |
| - const float slope = (1300 - 600) / (12.0f - 1.5f); |
| - return 600 + slope * (intermediate_score - 1.5); |
| + // It will reach this stage when the score is > highest bucket score or |
| + // when buckets are not specified. Return max_hqp_score. |
|
Mark P
2015/02/14 01:27:14
Buckets should never be not specified. Correct th
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| + return max_hqp_score; |
| +} |
| + |
| +// static |
| +void ScoredHistoryMatch::InitializeHQPExperimentalParams() { |
| + // These are default HQP relevance scoring buckets. |
| + // See GetFinalRelevancyScore() for details. |
| + std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399"; |
| + |
| + // Fetch the experiment params if they are any. |
| + hqp_experimental_scoring_enabled_ = |
| + OmniboxFieldTrial::HQPExperimentalScoringEnabled(); |
| + |
| + if (hqp_experimental_scoring_enabled_) { |
| + // Add the topicality threshold from experiment params. |
| + float hqp_experimental_topicality_threhold = |
| + OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); |
| + if (hqp_experimental_topicality_threhold > 0) |
|
Mark P
2015/02/14 01:27:14
Why have a >0 test? If you're checking for unspec
Ashok vardhan
2015/02/17 01:23:52
Done.
|
| + topicality_threshold_ = hqp_experimental_topicality_threhold; |
| + |
| + // Add the HQP experimental scoring buckets. |
| + std::string hqp_experimental_scoring_buckets = |
| + OmniboxFieldTrial::HQPExperimentalScoringBuckets(); |
| + if (!hqp_experimental_scoring_buckets.empty()) |
| + hqp_relevance_buckets_str = hqp_experimental_scoring_buckets; |
| + } |
| + |
| + // Parse the hqp_relevance_buckets_str string once and store them in vector |
| + // which is easy to access. |
| + hqp_relevance_buckets_ = |
| + new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>(); |
| + base::StringPairs kv_pairs; |
| + if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str, |
| + ':', ',', &kv_pairs)) { |
| + for (base::StringPairs::const_iterator it = kv_pairs.begin(); |
| + it != kv_pairs.end(); ++it) { |
| + ScoreMaxRelevance bucket; |
| + base::StringToDouble(it->first, &bucket.first); |
|
Mark P
2015/02/14 01:27:14
minor nit: consider DCHECKING the return value for
Ashok vardhan
2015/02/17 01:23:52
DCHECK seems to be not working here.
std::strin
|
| + base::StringToInt(it->second, &bucket.second); |
| + hqp_relevance_buckets_->push_back(bucket); |
| + } |
| } |
| - // Linearly extrapolate so a score of 20 (or more) has a score of 1399. |
| - // (Scores above 20 are possible for URLs that have multiple term hits |
| - // in the URL and/or title and that are visited practically all |
| - // the time using typed visits. We don't attempt to distinguish |
| - // between these very good results.) |
| - const float slope = (1399 - 1300) / (20.0f - 12.0f); |
| - return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); |
| } |
| +// static |
| void ScoredHistoryMatch::Init() { |
| if (initialized_) |
| return; |
| @@ -602,6 +682,10 @@ void ScoredHistoryMatch::Init() { |
| bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
| allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
| allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
| + |
| + // Initialize the HQP Experimental scoring params. |
| + InitializeHQPExperimentalParams(); |
| + |
| initialized_ = true; |
| } |