Index: chrome/browser/history/scored_history_match.cc |
diff --git a/chrome/browser/history/scored_history_match.cc b/chrome/browser/history/scored_history_match.cc |
index 1f1184c859273b31a94a901238661ce72bd018d8..aa769d2f2eb60453a5b06d43561bd89f1d238097 100644 |
--- a/chrome/browser/history/scored_history_match.cc |
+++ b/chrome/browser/history/scored_history_match.cc |
@@ -14,6 +14,8 @@ |
#include "base/logging.h" |
#include "base/metrics/histogram.h" |
+#include "base/strings/string_number_conversions.h" |
+#include "base/strings/string_split.h" |
#include "base/strings/string_util.h" |
#include "base/strings/utf_string_conversions.h" |
#include "chrome/browser/autocomplete/history_url_provider.h" |
@@ -40,6 +42,11 @@ bool ScoredHistoryMatch::allow_scheme_matches_ = false; |
bool ScoredHistoryMatch::also_do_hup_like_scoring_ = false; |
int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches_ = -1; |
+bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_ = false; |
Mark P
2015/02/14 01:27:14
(1) These static initializers should be in the ord
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+float ScoredHistoryMatch::topicality_threshold_ = -1; |
+std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* |
+ ScoredHistoryMatch::hqp_relevance_buckets_ = NULL; |
+ |
ScoredHistoryMatch::ScoredHistoryMatch() |
: raw_score_(0), |
can_inline_(false) { |
@@ -154,7 +161,8 @@ ScoredHistoryMatch::ScoredHistoryMatch( |
terms.size(), url, terms_to_word_starts_offsets, word_starts); |
const float frequency_score = GetFrequency( |
now, (history_client && history_client->IsBookmarked(gurl)), visits); |
- raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score); |
+ raw_score_ = GetFinalRelevancyScore(topicality_score, frequency_score, |
+ *hqp_relevance_buckets_); |
raw_score_ = |
(raw_score_ <= kint32max) ? static_cast<int>(raw_score_) : kint32max; |
@@ -438,7 +446,15 @@ float ScoredHistoryMatch::GetTopicalityScore( |
// TODO(mpearson): If there are multiple terms, consider taking the |
// geometric mean of per-term scores rather than the arithmetic mean. |
- return topicality_score / num_terms; |
+ float final_topicality_score = topicality_score / num_terms; |
+ |
+ // Demote all the URLs if the topicality score is less than threshold. |
+ if (hqp_experimental_scoring_enabled_ && |
+ (final_topicality_score < topicality_threshold_)) { |
+ return 0.0; |
+ } |
+ |
+ return final_topicality_score; |
} |
// static |
@@ -542,8 +558,10 @@ float ScoredHistoryMatch::GetFrequency(const base::Time& now, |
} |
// static |
-float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
- float frequency_score) { |
+float ScoredHistoryMatch::GetFinalRelevancyScore( |
+ float topicality_score, |
+ float frequency_score, |
+ const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { |
if (topicality_score == 0) |
return 0; |
// Here's how to interpret intermediate_score: Suppose the omnibox |
@@ -559,29 +577,91 @@ float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, |
// - a typed visit once a week -> 11.77 |
// - a typed visit every three days -> 14.12 |
// - at least ten typed visits today -> 20.0 (maximum score) |
- const float intermediate_score = topicality_score * frequency_score; |
+ // |
// The below code maps intermediate_score to the range [0, 1399]. |
+ // For example: |
+ // HQP default scoring buckets: "1.5:600,12.0:1300,20.0:1399" |
Mark P
2015/02/14 01:27:13
I think you'll want your format to require having
Ashok vardhan
2015/02/17 01:23:52
Make sense. Thought all the minimum scores for pro
|
+ // We will linearly interpolate the scores between: |
+ // 0 to 1.5 --> 400 to 600 |
+ // 1.5 to 12.0 --> 600 to 1300 |
+ // 12.0 to 20.0 --> 1300 to 1399 |
+ // >= 20.0 --> 1399 |
+ // |
// The score maxes out at 1400 (i.e., cannot beat a good inline result). |
Mark P
2015/02/14 01:27:14
1400 -> 1399
also, good inline result -> good inli
Ashok vardhan
2015/02/17 01:23:52
Done.
|
- if (intermediate_score <= 1) { |
- // Linearly extrapolate between 0 and 1.5 so 0 has a score of 400 |
- // and 1.5 has a score of 600. |
- const float slope = (600 - 400) / (1.5f - 0.0f); |
- return 400 + slope * intermediate_score; |
+ // |
+ // If experimental scoring is enabled, then the score buckets will be like: |
Mark P
2015/02/14 01:27:14
I don't know what line 592-593 are adding. Is it
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+ // HQP experimental scoring buckets: "1.5:600,5.0:900,12.0:1100,20.0:1300" |
+ const float intermediate_score = topicality_score * frequency_score; |
+ |
+ double base_intermediate_score = 0.0; |
Mark P
2015/02/14 01:27:14
Please comment the variables in this block more or
Ashok vardhan
2015/02/17 01:23:52
Acknowledged.
|
+ int base_hqp_score = 400; |
+ double max_intermediate_score = base_intermediate_score; |
+ int max_hqp_score = base_hqp_score; |
+ |
+ // Find the threshold where intermediate score is greater than bucket. |
+ for (size_t i = 0; i < hqp_relevance_buckets.size(); ++i) { |
+ ScoreMaxRelevance hqp_bucket = hqp_relevance_buckets[i]; |
Mark P
2015/02/14 01:27:14
const &
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+ max_intermediate_score = hqp_bucket.first; |
+ max_hqp_score = hqp_bucket.second; |
+ if (intermediate_score <= max_intermediate_score) { |
+ const float slope = ( |
+ (max_hqp_score - base_hqp_score) / |
+ (max_intermediate_score - base_intermediate_score)); |
+ const int final_hqp_score = (base_hqp_score + |
Mark P
2015/02/14 01:27:13
does the whole right side of the equals fit on one
Mark P
2015/02/14 01:27:14
This function returns a float. Don't force it to
Ashok vardhan
2015/02/17 01:23:52
Nope. And also i guess its easy to read if it is l
Ashok vardhan
2015/02/17 01:23:52
Done.
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+ (slope * (intermediate_score - |
+ base_intermediate_score))); |
+ return std::min(final_hqp_score, max_hqp_score); |
Mark P
2015/02/14 01:27:13
Why is this min necessary?
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+ } |
+ base_intermediate_score = max_intermediate_score; |
+ base_hqp_score = max_hqp_score; |
} |
- if (intermediate_score <= 12.0) { |
- // Linearly extrapolate up to 12 so 12 has a score of 1300. |
- const float slope = (1300 - 600) / (12.0f - 1.5f); |
- return 600 + slope * (intermediate_score - 1.5); |
+ // It will reach this stage when the score is > highest bucket score or |
+ // when buckets are not specified. Return max_hqp_score. |
Mark P
2015/02/14 01:27:14
Buckets should never be not specified. Correct th
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+ return max_hqp_score; |
+} |
+ |
+// static |
+void ScoredHistoryMatch::InitializeHQPExperimentalParams() { |
+ // These are default HQP relevance scoring buckets. |
+ // See GetFinalRelevancyScore() for details. |
+ std::string hqp_relevance_buckets_str = "1.5:600,12.0:1300,20.0:1399"; |
+ |
+ // Fetch the experiment params if they are any. |
+ hqp_experimental_scoring_enabled_ = |
+ OmniboxFieldTrial::HQPExperimentalScoringEnabled(); |
+ |
+ if (hqp_experimental_scoring_enabled_) { |
+ // Add the topicality threshold from experiment params. |
+ float hqp_experimental_topicality_threhold = |
+ OmniboxFieldTrial::HQPExperimentalTopicalityThreshold(); |
+ if (hqp_experimental_topicality_threhold > 0) |
Mark P
2015/02/14 01:27:14
Why have a >0 test? If you're checking for unspec
Ashok vardhan
2015/02/17 01:23:52
Done.
|
+ topicality_threshold_ = hqp_experimental_topicality_threhold; |
+ |
+ // Add the HQP experimental scoring buckets. |
+ std::string hqp_experimental_scoring_buckets = |
+ OmniboxFieldTrial::HQPExperimentalScoringBuckets(); |
+ if (!hqp_experimental_scoring_buckets.empty()) |
+ hqp_relevance_buckets_str = hqp_experimental_scoring_buckets; |
+ } |
+ |
+ // Parse the hqp_relevance_buckets_str string once and store them in vector |
+ // which is easy to access. |
+ hqp_relevance_buckets_ = |
+ new std::vector<ScoredHistoryMatch::ScoreMaxRelevance>(); |
+ base::StringPairs kv_pairs; |
+ if (base::SplitStringIntoKeyValuePairs(hqp_relevance_buckets_str, |
+ ':', ',', &kv_pairs)) { |
+ for (base::StringPairs::const_iterator it = kv_pairs.begin(); |
+ it != kv_pairs.end(); ++it) { |
+ ScoreMaxRelevance bucket; |
+ base::StringToDouble(it->first, &bucket.first); |
Mark P
2015/02/14 01:27:14
minor nit: consider DCHECKING the return value for
Ashok vardhan
2015/02/17 01:23:52
DCHECK seems to be not working here.
std::strin
|
+ base::StringToInt(it->second, &bucket.second); |
+ hqp_relevance_buckets_->push_back(bucket); |
+ } |
} |
- // Linearly extrapolate so a score of 20 (or more) has a score of 1399. |
- // (Scores above 20 are possible for URLs that have multiple term hits |
- // in the URL and/or title and that are visited practically all |
- // the time using typed visits. We don't attempt to distinguish |
- // between these very good results.) |
- const float slope = (1399 - 1300) / (20.0f - 12.0f); |
- return std::min(1399.0, 1300 + slope * (intermediate_score - 12.0)); |
} |
+// static |
void ScoredHistoryMatch::Init() { |
if (initialized_) |
return; |
@@ -602,6 +682,10 @@ void ScoredHistoryMatch::Init() { |
bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); |
allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); |
allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); |
+ |
+ // Initialize the HQP Experimental scoring params. |
+ InitializeHQPExperimentalParams(); |
+ |
initialized_ = true; |
} |