Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)
Patch Set: remove setup/teardown test case code Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <vector> 10 #include <vector>
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 } 102 }
103 103
104 } // namespace 104 } // namespace
105 105
106 // static 106 // static
107 bool ScoredHistoryMatch::also_do_hup_like_scoring_; 107 bool ScoredHistoryMatch::also_do_hup_like_scoring_;
108 float ScoredHistoryMatch::bookmark_value_; 108 float ScoredHistoryMatch::bookmark_value_;
109 float ScoredHistoryMatch::typed_value_; 109 float ScoredHistoryMatch::typed_value_;
110 bool ScoredHistoryMatch::fix_few_visits_bug_; 110 bool ScoredHistoryMatch::fix_few_visits_bug_;
111 bool ScoredHistoryMatch::frequency_uses_sum_; 111 bool ScoredHistoryMatch::frequency_uses_sum_;
112 OmniboxFieldTrial::NumMatchesMultipliers*
113 ScoredHistoryMatch::num_matches_to_frequency_multiplier_ = nullptr;
112 size_t ScoredHistoryMatch::max_visits_to_score_; 114 size_t ScoredHistoryMatch::max_visits_to_score_;
113 bool ScoredHistoryMatch::allow_tld_matches_; 115 bool ScoredHistoryMatch::allow_tld_matches_;
114 bool ScoredHistoryMatch::allow_scheme_matches_; 116 bool ScoredHistoryMatch::allow_scheme_matches_;
115 size_t ScoredHistoryMatch::num_title_words_to_allow_; 117 size_t ScoredHistoryMatch::num_title_words_to_allow_;
116 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_; 118 bool ScoredHistoryMatch::hqp_experimental_scoring_enabled_;
117 119
118 // Default topicality threshold. See GetTopicalityScore() for details. 120 // Default topicality threshold. See GetTopicalityScore() for details.
119 float ScoredHistoryMatch::topicality_threshold_ = 0.8f; 121 float ScoredHistoryMatch::topicality_threshold_ = 0.8f;
120 122
121 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details 123 // Default HQP relevance buckets. See GetFinalRelevancyScore() for more details
122 // on these numbers. 124 // on these numbers.
123 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] = 125 char ScoredHistoryMatch::hqp_relevance_buckets_str_[] =
124 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399"; 126 "0.0:400,1.5:600,5.0:900,10.5:1203,15.0:1300,20.0:1399";
125 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>* 127 std::vector<ScoredHistoryMatch::ScoreMaxRelevance>*
126 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr; 128 ScoredHistoryMatch::hqp_relevance_buckets_ = nullptr;
127 129
128 ScoredHistoryMatch::ScoredHistoryMatch() 130 ScoredHistoryMatch::ScoredHistoryMatch()
129 : ScoredHistoryMatch(history::URLRow(), 131 : ScoredHistoryMatch(history::URLRow(),
130 VisitInfoVector(), 132 VisitInfoVector(),
131 base::string16(), 133 base::string16(),
132 String16Vector(), 134 String16Vector(),
133 WordStarts(), 135 WordStarts(),
134 RowWordStarts(), 136 RowWordStarts(),
135 false, 137 false,
136 base::Time::Max()) { 138 1,
137 } 139 base::Time::Max()) {}
138 140
139 ScoredHistoryMatch::ScoredHistoryMatch( 141 ScoredHistoryMatch::ScoredHistoryMatch(
140 const history::URLRow& row, 142 const history::URLRow& row,
141 const VisitInfoVector& visits, 143 const VisitInfoVector& visits,
142 const base::string16& lower_string, 144 const base::string16& lower_string,
143 const String16Vector& terms_vector, 145 const String16Vector& terms_vector,
144 const WordStarts& terms_to_word_starts_offsets, 146 const WordStarts& terms_to_word_starts_offsets,
145 const RowWordStarts& word_starts, 147 const RowWordStarts& word_starts,
146 bool is_url_bookmarked, 148 bool is_url_bookmarked,
149 size_t num_matching_pages,
147 base::Time now) 150 base::Time now)
148 : HistoryMatch(row, 0, false, false), raw_score(0) { 151 : HistoryMatch(row, 0, false, false), raw_score(0) {
149 // NOTE: Call Init() before doing any validity checking to ensure that the 152 // NOTE: Call Init() before doing any validity checking to ensure that the
150 // class is always initialized after an instance has been constructed. In 153 // class is always initialized after an instance has been constructed. In
151 // particular, this ensures that the class is initialized after an instance 154 // particular, this ensures that the class is initialized after an instance
152 // has been constructed via the no-args constructor. 155 // has been constructed via the no-args constructor.
153 ScoredHistoryMatch::Init(); 156 ScoredHistoryMatch::Init();
154 157
155 // Figure out where each search term appears in the URL and/or page title 158 // Figure out where each search term appears in the URL and/or page title
156 // so that we can score as well as provide autocomplete highlighting. 159 // so that we can score as well as provide autocomplete highlighting.
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 // the prefix that makes it inlineable may be empty. 256 // the prefix that makes it inlineable may be empty.
254 likely_can_inline = true; 257 likely_can_inline = true;
255 innermost_match = (best_inlineable_prefix->num_components == 258 innermost_match = (best_inlineable_prefix->num_components ==
256 best_prefix->num_components); 259 best_prefix->num_components);
257 } 260 }
258 } 261 }
259 } 262 }
260 263
261 const float topicality_score = GetTopicalityScore( 264 const float topicality_score = GetTopicalityScore(
262 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); 265 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);
263 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); 266 const float frequency_score =
267 GetFrequency(now, is_url_bookmarked, visits, num_matching_pages);
264 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore( 268 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore(
265 topicality_score, frequency_score, *hqp_relevance_buckets_)); 269 topicality_score, frequency_score, *hqp_relevance_buckets_));
266 270
267 if (also_do_hup_like_scoring_ && likely_can_inline) { 271 if (also_do_hup_like_scoring_ && likely_can_inline) {
268 // HistoryURL-provider-like scoring gives any match that is 272 // HistoryURL-provider-like scoring gives any match that is
269 // capable of being inlined a certain minimum score. Some of these 273 // capable of being inlined a certain minimum score. Some of these
270 // are given a higher score that lets them be shown in inline. 274 // are given a higher score that lets them be shown in inline.
271 // This test here derives from the test in 275 // This test here derives from the test in
272 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 276 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
273 const bool promote_to_inline = 277 const bool promote_to_inline =
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
405 if (initialized) 409 if (initialized)
406 return; 410 return;
407 411
408 initialized = true; 412 initialized = true;
409 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring(); 413 also_do_hup_like_scoring_ = OmniboxFieldTrial::HQPAlsoDoHUPLikeScoring();
410 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue(); 414 bookmark_value_ = OmniboxFieldTrial::HQPBookmarkValue();
411 typed_value_ = OmniboxFieldTrial::HQPTypedValue(); 415 typed_value_ = OmniboxFieldTrial::HQPTypedValue();
412 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore(); 416 max_visits_to_score_ = OmniboxFieldTrial::HQPMaxVisitsToScore();
413 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum(); 417 frequency_uses_sum_ = OmniboxFieldTrial::HQPFreqencyUsesSum();
414 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug(); 418 fix_few_visits_bug_ = OmniboxFieldTrial::HQPFixFewVisitsBug();
419 num_matches_to_frequency_multiplier_ =
420 new OmniboxFieldTrial::NumMatchesMultipliers();
Peter Kasting 2016/12/01 07:07:53 This leaks. If you want a leaky object of class t
Mark P 2016/12/04 01:06:42 Still TODO.
Mark P 2016/12/06 21:02:47 I don't understand how this macro should be used i
Peter Kasting 2016/12/06 21:41:27 There are two ways to do this I can think of: (1)
Mark P 2016/12/08 00:21:31 Thanks for the explanation. I'll do something lik
421 OmniboxFieldTrial::HQPGetNumMatchesMultipliers(
422 num_matches_to_frequency_multiplier_);
415 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue(); 423 allow_tld_matches_ = OmniboxFieldTrial::HQPAllowMatchInTLDValue();
416 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue(); 424 allow_scheme_matches_ = OmniboxFieldTrial::HQPAllowMatchInSchemeValue();
417 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow(); 425 num_title_words_to_allow_ = OmniboxFieldTrial::HQPNumTitleWordsToAllow();
418 426
419 InitRawTermScoreToTopicalityScoreArray(); 427 InitRawTermScoreToTopicalityScoreArray();
420 InitDaysAgoToRecencyScoreArray(); 428 InitDaysAgoToRecencyScoreArray();
421 InitHQPExperimentalParams(); 429 InitHQPExperimentalParams();
422 } 430 }
423 431
424 float ScoredHistoryMatch::GetTopicalityScore( 432 float ScoredHistoryMatch::GetTopicalityScore(
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
574 // Lookup the score in days_ago_to_recency_score, treating 582 // Lookup the score in days_ago_to_recency_score, treating
575 // everything older than what we've precomputed as the oldest thing 583 // everything older than what we've precomputed as the oldest thing
576 // we've precomputed. The std::max is to protect against corruption 584 // we've precomputed. The std::max is to protect against corruption
577 // in the database (in case last_visit_days_ago is negative). 585 // in the database (in case last_visit_days_ago is negative).
578 return days_ago_to_recency_score[std::max( 586 return days_ago_to_recency_score[std::max(
579 std::min(last_visit_days_ago, kDaysToPrecomputeRecencyScoresFor - 1), 0)]; 587 std::min(last_visit_days_ago, kDaysToPrecomputeRecencyScoresFor - 1), 0)];
580 } 588 }
581 589
582 float ScoredHistoryMatch::GetFrequency(const base::Time& now, 590 float ScoredHistoryMatch::GetFrequency(const base::Time& now,
583 const bool bookmarked, 591 const bool bookmarked,
584 const VisitInfoVector& visits) const { 592 const VisitInfoVector& visits,
593 const size_t num_matching_pages) const {
585 // Compute the weighted sum of |value_of_transition| over the last at most 594 // Compute the weighted sum of |value_of_transition| over the last at most
586 // |max_visits_to_score_| visits, where each visit is weighted using 595 // |max_visits_to_score_| visits, where each visit is weighted using
587 // GetRecencyScore() based on how many days ago it happened. 596 // GetRecencyScore() based on how many days ago it happened.
588 float summed_visit_points = 0; 597 float summed_visit_points = 0;
589 auto visits_end = 598 auto visits_end =
590 visits.begin() + std::min(visits.size(), max_visits_to_score_); 599 visits.begin() + std::min(visits.size(), max_visits_to_score_);
591 // Visits should be in newest to oldest order. 600 // Visits should be in newest to oldest order.
592 DCHECK(std::adjacent_find( 601 DCHECK(std::adjacent_find(
593 visits.begin(), visits_end, 602 visits.begin(), visits_end,
594 [](const history::VisitInfo& a, const history::VisitInfo& b) { 603 [](const history::VisitInfo& a, const history::VisitInfo& b) {
595 return a.first < b.first; 604 return a.first < b.first;
596 }) == visits_end); 605 }) == visits_end);
597 for (auto i = visits.begin(); i != visits_end; ++i) { 606 for (auto i = visits.begin(); i != visits_end; ++i) {
598 const bool is_page_transition_typed = 607 const bool is_page_transition_typed =
599 ui::PageTransitionCoreTypeIs(i->second, ui::PAGE_TRANSITION_TYPED); 608 ui::PageTransitionCoreTypeIs(i->second, ui::PAGE_TRANSITION_TYPED);
600 float value_of_transition = is_page_transition_typed ? typed_value_ : 1; 609 float value_of_transition = is_page_transition_typed ? typed_value_ : 1;
601 if (bookmarked) 610 if (bookmarked)
602 value_of_transition = std::max(value_of_transition, bookmark_value_); 611 value_of_transition = std::max(value_of_transition, bookmark_value_);
603 const float bucket_weight = GetRecencyScore((now - i->first).InDays()); 612 const float bucket_weight = GetRecencyScore((now - i->first).InDays());
604 summed_visit_points += (value_of_transition * bucket_weight); 613 summed_visit_points += (value_of_transition * bucket_weight);
605 } 614 }
606 if (frequency_uses_sum_) 615 float frequency;
607 return summed_visit_points; 616 if (frequency_uses_sum_) {
608 617 frequency = summed_visit_points;
609 // Compute the average weighted value_of_transition and return it. 618 } else {
610 // Use |max_visits_to_score_| as the denominator for the average regardless of 619 // Compute the average weighted value_of_transition and return it.
611 // how many visits there were in order to penalize a match that has 620 // Use |max_visits_to_score_| as the denominator for the average regardless
612 // fewer visits than kMaxVisitsToScore. 621 // of how many visits there were in order to penalize a match that has
613 if (fix_few_visits_bug_) 622 // fewer visits than kMaxVisitsToScore.
614 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; 623 if (fix_few_visits_bug_) {
615 return visits.size() * summed_visit_points / 624 frequency =
616 ScoredHistoryMatch::max_visits_to_score_; 625 summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;
626 } else {
627 frequency = visits.size() * summed_visit_points /
628 ScoredHistoryMatch::max_visits_to_score_;
629 }
630 }
631 // Boost the score if applicable.
632 if (num_matches_to_frequency_multiplier_->find(num_matching_pages) !=
633 num_matches_to_frequency_multiplier_->end())
634 frequency *= (*num_matches_to_frequency_multiplier_)[num_matching_pages];
635 return frequency;
Peter Kasting 2016/12/01 07:07:53 I don't love putting this here. Part of this is c
Mark P 2016/12/04 01:06:42 I agree with your reasoning below. After some tho
617 } 636 }
618 637
619 // static 638 // static
620 float ScoredHistoryMatch::GetFinalRelevancyScore( 639 float ScoredHistoryMatch::GetFinalRelevancyScore(
621 float topicality_score, 640 float topicality_score,
622 float frequency_score, 641 float frequency_score,
623 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) { 642 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets) {
624 DCHECK(hqp_relevance_buckets.size() > 0); 643 DCHECK(hqp_relevance_buckets.size() > 0);
625 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0); 644 DCHECK_EQ(hqp_relevance_buckets[0].first, 0.0);
626 645
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
721 base::StringToDouble(it->first, &bucket.first); 740 base::StringToDouble(it->first, &bucket.first);
722 DCHECK(is_valid_intermediate_score); 741 DCHECK(is_valid_intermediate_score);
723 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); 742 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);
724 DCHECK(is_valid_hqp_score); 743 DCHECK(is_valid_hqp_score);
725 hqp_buckets->push_back(bucket); 744 hqp_buckets->push_back(bucket);
726 } 745 }
727 return true; 746 return true;
728 } 747 }
729 return false; 748 return false;
730 } 749 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698