Chromium Code Reviews| Index: chrome/browser/history/scored_history_match.cc |
| diff --git a/chrome/browser/history/scored_history_match.cc b/chrome/browser/history/scored_history_match.cc |
| index 1f1184c859273b31a94a901238661ce72bd018d8..1b9a33a8963361b18370c8067b9a724fd09a419e 100644 |
| --- a/chrome/browser/history/scored_history_match.cc |
| +++ b/chrome/browser/history/scored_history_match.cc |
| @@ -14,6 +14,8 @@ |
| #include "base/logging.h" |
| #include "base/metrics/histogram.h" |
| +#include "base/strings/string_number_conversions.h" |
| +#include "base/strings/string_split.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "chrome/browser/autocomplete/history_url_provider.h" |
| @@ -61,6 +63,9 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| can_inline_(false) { |
| Init(); |
| + // Initialize the HQP scoring params. |
| + InitializeHQPExperimentalParams(); |
|
Mark P
2015/02/11 21:57:29
This new location for initializing the parameters
Ashok vardhan
2015/02/12 19:56:31
I see. Got your point. Thought my parsing was the
|
| + |
| GURL gurl = row.url(); |
| if (!gurl.is_valid()) |
| return; |
| @@ -154,7 +159,8 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
| terms.size(), url, terms_to_word_starts_offsets, word_starts); |
| const float frequency_score = GetFrequency( |
| now, (history_client && history_client->IsBookmarked(gurl)), visits); |
| - raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); |
| + raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score, |
| + hqp_relevance_buckets_); |
| raw_score_ = |
| (raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; |
| @@ -438,7 +444,15 @@ float ScoredHistoryMatch::GetTopicalityScore( |
| // TODO(mpearson): If there are multiple terms, consider taking the |
| // geometric mean of per-term scores rather than the arithmetic mean. |
| - return topicality_score / num_terms; |
| + float final_topicality_score = topicality_score / num_terms; |
| + |
| + // Demote all the URLs if the topicality score is less than threshold. |
| + if (hqp_experimental_scoring_enabled_ && |
| + (final_topicality_score < topicality_threshold_)) { |
| + return 0.0; |
| + } |
| + |
| + return final_topicality_score; |
| } |
| // static |
| @@ -542,8 +556,10 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
| } |
| // static |
| -float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
| - float frequency_score) { |
| +float ScoredHistoryMatch::GetFinalRelevancyScore( |
| + float topicality_score, |
| + float frequency_score, |
| + std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
| if (topicality_score == 0) |
| return 0; |
| // Here's how to interpret intermediate_score: Suppose the omnibox |
| @@ -559,29 +575,91 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
| // - a typed visit once a week -> 11.77 |
| // - a typed visit every three days -> 14.12 |
| // - at least ten typed visits today -> 20.0 (maximum score) |
| - const float intermediate_score = topicality_score * frequency_score; |
| + // |
| // The below code maps intermediate_score to the range [0, 1399]. |
| + // For example: |
| + // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399" |
| + // We will linearly interpolate the scores between: |
| + // 0 to 1.5 --> 400 to 600 |
| + // 1.5 to 12.0 --> 600 to 1300 |
| + // 12.0 to 20.0 --> 1300 to 1399 |
| + // >= 20.0 --> 1399 |
| + // |
| // The score maxes out at 1400 (i.e., cannot beat a good inline result). |
| - if (intermediate_score <= 1) { |
| - // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 |
| - // and 1.5 has a score of 600. |
| - const float slope = (600 - 400) / (1.5f - 0.0f); |
| - return 400 + slope * intermediate_score; |
| + // |
| + // If experimental scoring is enabled, then the score buckets will be like: |
| + // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300" |
| + const float intermediate_score = topicality_score * frequency_score; |
| + |
| + double base_intermediate_score = 0.0; |
| + int base_hqp_score = 400; |
| + double max_intermediate_score = base_intermediate_score; |
| + int max_hqp_score = base_hqp_score; |
| + |
| + // Find the threshold where intermediate score is greater than bucket. |
| + for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) { |
| + ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i]; |
| + max_intermediate_score = hqp_bucket.first; |
| + max_hqp_score = hqp_bucket.second; |
| + if (intermediate_score <= max_intermediate_score) { |
| + const float slope = ( |
| + (max_hqp_score - base_hqp_score) / |
| + (max_intermediate_score - base_intermediate_score)); |
| + const int final_hqp_score = (base_hqp_score + |
| + (slope * (intermediate_score - |
| + base_intermediate_score))); |
| + return std::min(final_hqp_score, max_hqp_score); |
| + } |
| + base_intermediate_score = max_intermediate_score; |
| + base_hqp_score = max_hqp_score; |
| } |
| - if (intermediate_score <= 12.0) { |
| - // Linearly extrapolate up to 12 so 12 has a score of 1300. |
| - const float slope = (1300 - 600) / (12.0f - 1.5f); |
| - return 600 + slope * (intermediate_score - 1.5); |
| + // It will reach this stage when the score is > highest bucket score or |
| + // when buckets are not specified. Return max_hqp_score. |
| + return max_hqp_score; |
| +} |
| + |
| +void ScoredHistoryMatch::InitializeHQPExperimentalParams() { |
| + // Initialize the hqp experiment params. |
| + hqp_experimental_scoring_enabled_ = false; |
| + topicality_threshold_ = -1; |
| + // These are default HQP scoring params. |
| + // See GetFinalRelevancyScore() for details. |
| + std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399"; |
| + |
| + // Fetch the experiment params if they are any. |
| + hqp_experimental_scoring_enabled_ = |
| + OmniboxFieldTrial::HQPExperimentalScoringEnabled(); |
| + |
| + if (hqp_experimental_scoring_enabled_) { |
| + // Add the topicality threshold from experiment params. |
| + float hqp_experimental_topicality_threhold = |
| + OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); |
| + if (hqp_experimental_topicality_threhold > 0) |
| + topicality_threshold_ = hqp_experimental_topicality_threhold; |
| + |
| + // Add the HQP experimental scoring buckets. |
| + std::string hqp_experimental_scoring_buckets = |
| + OmniboxFieldTrial::HQPExperimentalScoringBuckets(); |
| + if (!hqp_experimental_scoring_buckets.empty()) |
| + hqp_relevance_buckets_str = hqp_experimental_scoring_buckets; |
| + } |
| + |
| + // Parse the hqp_relevance_buckets_str string once and store them in vector |
| + // which is easy to access. |
| + base::StringPairs kv_pairs; |
| + if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str, |
| + ':', ',', &kv_pairs)) { |
| + for (base::StringPairs::const_iterator it = kv_pairs.begin(); |
| + it != kv_pairs.end(); ++it) { |
| + ScoreMaxRelevance bucket; |
| + base::StringToDouble(it->first, &bucket.first); |
| + base::StringToInt(it->second, &bucket.second); |
| + hqp_relevance_buckets_.push_back(bucket); |
| + } |
| } |
| - // Linearly extrapolate so a score of 20 (or more) has a score of 1399. |
| - // (Scores above 20 are possible for URLs that have multiple term hits |
| - // in the URL and/or title and that are visited practically all |
| - // the time using typed visits. We don't attempt to distinguish |
| - // between these very good results.) |
| - const float slope = (1399 - 1300) / (20.0f - 12.0f); |
| - return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); |
| } |
| +// static |
| void ScoredHistoryMatch::Init() { |
| if (initialized_) |
| return; |
| @@ -602,6 +680,7 @@ void ScoredHistoryMatch::Init() { |
| bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
| allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
| allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
| + |
| initialized_ = true; |
| } |