Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(264)

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)
Patch Set: improved comments and formatting Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <vector> 10 #include <vector>
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 } 102 }
103 103
104 } // namespace 104 } // namespace
105 105
106 // static 106 // static
107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; 107 bool ScoredHistoryMatch::also_do_hup_like_scoring_;
108 float ScoredHistoryMatch::bookmark_value_; 108 float ScoredHistoryMatch::bookmark_value_;
109 float ScoredHistoryMatch::typed_value_; 109 float ScoredHistoryMatch::typed_value_;
110 bool ScoredHistoryMatch::fix_few_visits_bug_; 110 bool ScoredHistoryMatch::fix_few_visits_bug_;
111 bool ScoredHistoryMatch::frequency_uses_sum_; 111 bool ScoredHistoryMatch::frequency_uses_sum_;
112 OmniboxFieldTrial::NumMatchesScores*
113 ScoredHistoryMatch::num_matches_to_document_specificity_score_ = nullptr;
112 size_t ScoredHistoryMatch::max_visits_to_score_; 114 size_t ScoredHistoryMatch::max_visits_to_score_;
113 bool ScoredHistoryMatch::allow_tld_matches_; 115 bool ScoredHistoryMatch::allow_tld_matches_;
114 bool ScoredHistoryMatch::allow_scheme_matches_; 116 bool ScoredHistoryMatch::allow_scheme_matches_;
115 size_t ScoredHistoryMatch::num_title_words_to_allow_; 117 size_t ScoredHistoryMatch::num_title_words_to_allow_;
116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; 118 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_;
117 119
118 // Default topicality threshold. See GetTopicalityScore() for details. 120 // Default topicality threshold. See GetTopicalityScore() for details.
119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; 121 float ScoredHistoryMatch::topicality_threshold_ = 0.8f;
120 122
121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details 123 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details
122 // on these numbers. 124 // on these numbers.
123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = 125 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] =
124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; 126 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";
125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* 127 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*
126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; 128 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr;
127 129
128 ScoredHistoryMatch::ScoredHistoryMatch() 130 ScoredHistoryMatch::ScoredHistoryMatch()
129 : ScoredHistoryMatch(history::URLRow(), 131 : ScoredHistoryMatch(history::URLRow(),
130 VisitInfoVector(), 132 VisitInfoVector(),
131 base::string16(), 133 base::string16(),
132 String16Vector(), 134 String16Vector(),
133 WordStarts(), 135 WordStarts(),
134 RowWordStarts(), 136 RowWordStarts(),
135 false, 137 false,
136 base::Time::Max()) { 138 1,
137 } 139 base::Time::Max()) {}
138 140
139 ScoredHistoryMatch::ScoredHistoryMatch( 141 ScoredHistoryMatch::ScoredHistoryMatch(
140 const history::URLRow& row, 142 const history::URLRow& row,
141 const VisitInfoVector& visits, 143 const VisitInfoVector& visits,
142 const base::string16& lower_string, 144 const base::string16& lower_string,
143 const String16Vector& terms_vector, 145 const String16Vector& terms_vector,
144 const WordStarts& terms_to_word_starts_offsets, 146 const WordStarts& terms_to_word_starts_offsets,
145 const RowWordStarts& word_starts, 147 const RowWordStarts& word_starts,
146 bool is_url_bookmarked, 148 bool is_url_bookmarked,
149 size_t num_matching_pages,
147 base::Time now) 150 base::Time now)
148 : HistoryMatch(row, 0, false, false), raw_score(0) { 151 : HistoryMatch(row, 0, false, false), raw_score(0) {
149 // NOTE: Call Init() before doing any validity checking to ensure that the 152 // NOTE: Call Init() before doing any validity checking to ensure that the
150 // class is always initialized after an instance has been constructed. In 153 // class is always initialized after an instance has been constructed. In
151 // particular, this ensures that the class is initialized after an instance 154 // particular, this ensures that the class is initialized after an instance
152 // has been constructed via the no-args constructor. 155 // has been constructed via the no-args constructor.
153 ScoredHistoryMatch::Init(); 156 ScoredHistoryMatch::Init();
154 157
155 // Figure out where each search term appears in the URL and/or page title 158 // Figure out where each search term appears in the URL and/or page title
156 // so that we can score as well as provide autocomplete highlighting. 159 // so that we can score as well as provide autocomplete highlighting.
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
254 likely_can_inline = true; 257 likely_can_inline = true;
255 innermost_match = (best_inlineable_prefix->num_components == 258 innermost_match = (best_inlineable_prefix->num_components ==
256 best_prefix->num_components); 259 best_prefix->num_components);
257 } 260 }
258 } 261 }
259 } 262 }
260 263
261 const float topicality_score = GetTopicalityScore( 264 const float topicality_score = GetTopicalityScore(
262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); 265 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);
263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); 266 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);
264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( 267 const float specificity_score =
265 topicality_score, frequency_score, *hqp_relevance_buckets_)); 268 GetDocumentSpecificityScore(num_matching_pages);
269 raw_score = base::saturated_cast<int>(
270 GetFinalRelevancyScore(topicality_score, frequency_score,
271 specificity_score, *hqp_relevance_buckets_));
266 272
267 if (also_do_hup_like_scoring_ && likely_can_inline) { 273 if (also_do_hup_like_scoring_ && likely_can_inline) {
268 // HistoryURL-provider-like scoring gives any match that is 274 // HistoryURL-provider-like scoring gives any match that is
269 // capable of being inlined a certain minimum score. Some of these 275 // capable of being inlined a certain minimum score. Some of these
270 // are given a higher score that lets them be shown in inline. 276 // are given a higher score that lets them be shown in inline.
271 // This test here derives from the test in 277 // This test here derives from the test in
272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 278 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
273 const bool promote_to_inline = 279 const bool promote_to_inline =
274 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); 280 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1));
275 int hup_like_score = 281 int hup_like_score =
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
405 if (initialized) 411 if (initialized)
406 return; 412 return;
407 413
408 initialized = true; 414 initialized = true;
409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); 415 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();
410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); 416 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
411 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); 417 typed_value_ = OmniboxFieldTrial::HQPTypedValue();
412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); 418 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();
413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); 419 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();
414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); 420 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();
421 num_matches_to_document_specificity_score_ =
422 new OmniboxFieldTrial::NumMatchesScores();
423 (*num_matches_to_document_specificity_score_) =
424 OmniboxFieldTrial::HQPNumMatchesScores();
415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); 425 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); 426 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); 427 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();
418 428
419 InitRawTermScoreToTopicalityScoreArray(); 429 InitRawTermScoreToTopicalityScoreArray();
420 InitDaysAgoToRecencyScoreArray(); 430 InitDaysAgoToRecencyScoreArray();
421 InitHQPExperimentalParams(); 431 InitHQPExperimentalParams();
422 } 432 }
423 433
424 float ScoredHistoryMatch::GetTopicalityScore( 434 float ScoredHistoryMatch::GetTopicalityScore(
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
609 // Compute the average weighted value_of_transition and return it. 619 // Compute the average weighted value_of_transition and return it.
610 // Use |max_visits_to_score_| as the denominator for the average regardless of 620 // Use |max_visits_to_score_| as the denominator for the average regardless of
611 // how many visits there were in order to penalize a match that has 621 // how many visits there were in order to penalize a match that has
612 // fewer visits than kMaxVisitsToScore. 622 // fewer visits than kMaxVisitsToScore.
613 if (fix_few_visits_bug_) 623 if (fix_few_visits_bug_)
614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; 624 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;
615 return visits.size() * summed_visit_points / 625 return visits.size() * summed_visit_points /
616 ScoredHistoryMatch::max_visits_to_score_; 626 ScoredHistoryMatch::max_visits_to_score_;
617 } 627 }
618 628
629 float ScoredHistoryMatch::GetDocumentSpecificityScore(
630 const size_t num_matching_pages) const {
631 // The floating point value below doesn't matter.
Peter Kasting 2016/12/06 05:19:30 I'm not sure that's true. Since upper_bound() ret
Mark P 2016/12/08 00:21:31 Good point. I never imagined a field trial specif
632 OmniboxFieldTrial::NumMatchesScores::const_iterator it =
633 std::upper_bound(num_matches_to_document_specificity_score_->begin(),
634 num_matches_to_document_specificity_score_->end(),
635 std::pair<size_t, double>{num_matching_pages, 1.0});
636 if (it == num_matches_to_document_specificity_score_->end())
637 return 1.0;
638 return it->second;
Peter Kasting 2016/12/06 05:19:30 Nit: Could use ?:
Mark P 2016/12/08 00:21:31 Done (after reversing the order, as that read bett
Peter Kasting 2016/12/08 00:51:40 My concern with the order reversal is that it sort
Mark P 2016/12/08 04:37:35 I understand the point, but I find != end to read
639 };
640
619 // static 641 // static
620 float ScoredHistoryMatch::GetFinalRelevancyScore( 642 float ScoredHistoryMatch::GetFinalRelevancyScore(
621 float topicality_score, 643 float topicality_score,
622 float frequency_score, 644 float frequency_score,
645 float specificity_score,
623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { 646 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {
624 DCHECK(hqp_relevance_buckets.size() > 0); 647 DCHECK(hqp_relevance_buckets.size() > 0);
625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); 648 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0);
626 649
627 if (topicality_score == 0) 650 if (topicality_score == 0)
628 return 0; 651 return 0;
629 // Here's how to interpret intermediate_score: Suppose the omnibox 652 // Here's how to interpret intermediate_score: Suppose the omnibox has one
630 // has one input term. Suppose we have a URL for which the omnibox 653 // input term. Suppose the input matches many documents. (This implies
654 // specificity_score == 1.0.) Suppose we have a URL for which the omnibox
631 // input term has a single URL hostname hit at a word boundary. (This 655 // input term has a single URL hostname hit at a word boundary. (This
632 // implies topicality_score = 1.0.). Then the intermediate_score for 656 // implies topicality_score = 1.0.). Then the intermediate_score for
633 // this URL will depend entirely on the frequency_score with 657 // this URL will depend entirely on the frequency_score with
634 // this interpretation: 658 // this interpretation:
635 // - a single typed visit more than three months ago, no other visits -> 0.2 659 // - a single typed visit more than three months ago, no other visits -> 0.2
636 // - a visit every three days, no typed visits -> 0.706 660 // - a visit every three days, no typed visits -> 0.706
637 // - a visit every day, no typed visits -> 0.916 661 // - a visit every day, no typed visits -> 0.916
638 // - a single typed visit yesterday, no other visits -> 2.0 662 // - a single typed visit yesterday, no other visits -> 2.0
639 // - a typed visit once a week -> 11.77 663 // - a typed visit once a week -> 11.77
640 // - a typed visit every three days -> 14.12 664 // - a typed visit every three days -> 14.12
641 // - at least ten typed visits today -> 20.0 (maximum score) 665 // - at least ten typed visits today -> 20.0 (maximum score)
642 // 666 //
643 // The below code maps intermediate_score to the range [0, 1399]. 667 // The below code maps intermediate_score to the range [0, 1399].
644 // For example: 668 // For example:
645 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" 669 // HQP default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"
646 // We will linearly interpolate the scores between: 670 // We will linearly interpolate the scores between:
647 // 0 to 1.5 --> 400 to 600 671 // 0 to 1.5 --> 400 to 600
648 // 1.5 to 12.0 --> 600 to 1300 672 // 1.5 to 12.0 --> 600 to 1300
649 // 12.0 to 20.0 --> 1300 to 1399 673 // 12.0 to 20.0 --> 1300 to 1399
650 // >= 20.0 --> 1399 674 // >= 20.0 --> 1399
651 // 675 //
652 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result 676 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result
653 // from HistoryURL provider). 677 // from HistoryURL provider).
654 const float intermediate_score = topicality_score * frequency_score; 678 const float intermediate_score =
679 topicality_score * frequency_score * specificity_score;
655 680
656 // Find the threshold where intermediate score is greater than bucket. 681 // Find the threshold where intermediate score is greater than bucket.
657 size_t i = 1; 682 size_t i = 1;
658 for (; i < hqp_relevance_buckets.size(); ++i) { 683 for (; i < hqp_relevance_buckets.size(); ++i) {
659 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i]; 684 const ScoreMaxRelevance& hqp_bucket = hqp_relevance_buckets[i];
660 if (intermediate_score >= hqp_bucket.first) { 685 if (intermediate_score >= hqp_bucket.first) {
661 continue; 686 continue;
662 } 687 }
663 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1]; 688 const ScoreMaxRelevance& previous_bucket = hqp_relevance_buckets[i - 1];
664 const float slope = ((hqp_bucket.second - previous_bucket.second) / 689 const float slope = ((hqp_bucket.second - previous_bucket.second) /
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
721 base::StringToDouble(it->first, &bucket.first); 746 base::StringToDouble(it->first, &bucket.first);
722 DCHECK(is_valid_intermediate_score); 747 DCHECK(is_valid_intermediate_score);
723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); 748 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);
724 DCHECK(is_valid_hqp_score); 749 DCHECK(is_valid_hqp_score);
725 hqp_buckets->push_back(bucket); 750 hqp_buckets->push_back(bucket);
726 } 751 }
727 return true; 752 return true;
728 } 753 }
729 return false; 754 return false;
730 } 755 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698