Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)
Patch Set: fix rebase errors Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <vector> 10 #include <vector>
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
110 float ScoredHistoryMatch::typed_value_; 110 float ScoredHistoryMatch::typed_value_;
111 bool ScoredHistoryMatch::fix_few_visits_bug_; 111 bool ScoredHistoryMatch::fix_few_visits_bug_;
112 bool ScoredHistoryMatch::frequency_uses_sum_; 112 bool ScoredHistoryMatch::frequency_uses_sum_;
113 size_t ScoredHistoryMatch::max_visits_to_score_; 113 size_t ScoredHistoryMatch::max_visits_to_score_;
114 bool ScoredHistoryMatch::allow_tld_matches_; 114 bool ScoredHistoryMatch::allow_tld_matches_;
115 bool ScoredHistoryMatch::allow_scheme_matches_; 115 bool ScoredHistoryMatch::allow_scheme_matches_;
116 size_t ScoredHistoryMatch::num_title_words_to_allow_; 116 size_t ScoredHistoryMatch::num_title_words_to_allow_;
117 float ScoredHistoryMatch::topicality_threshold_; 117 float ScoredHistoryMatch::topicality_threshold_;
118 ScoredHistoryMatch::ScoreMaxRelevances* 118 ScoredHistoryMatch::ScoreMaxRelevances*
119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr; 119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr;
120 OmniboxFieldTrial::NumMatchesScores*
121 ScoredHistoryMatch::matches_to_specificity_override_ = nullptr;
120 122
121 ScoredHistoryMatch::ScoredHistoryMatch() 123 ScoredHistoryMatch::ScoredHistoryMatch()
122 : ScoredHistoryMatch(history::URLRow(), 124 : ScoredHistoryMatch(history::URLRow(),
123 VisitInfoVector(), 125 VisitInfoVector(),
124 base::string16(), 126 base::string16(),
125 String16Vector(), 127 String16Vector(),
126 WordStarts(), 128 WordStarts(),
127 RowWordStarts(), 129 RowWordStarts(),
128 false, 130 false,
129 base::Time::Max()) { 131 1,
130 } 132 base::Time::Max()) {}
131 133
132 ScoredHistoryMatch::ScoredHistoryMatch( 134 ScoredHistoryMatch::ScoredHistoryMatch(
133 const history::URLRow& row, 135 const history::URLRow& row,
134 const VisitInfoVector& visits, 136 const VisitInfoVector& visits,
135 const base::string16& lower_string, 137 const base::string16& lower_string,
136 const String16Vector& terms_vector, 138 const String16Vector& terms_vector,
137 const WordStarts& terms_to_word_starts_offsets, 139 const WordStarts& terms_to_word_starts_offsets,
138 const RowWordStarts& word_starts, 140 const RowWordStarts& word_starts,
139 bool is_url_bookmarked, 141 bool is_url_bookmarked,
142 size_t num_matching_pages,
140 base::Time now) 143 base::Time now)
141 : HistoryMatch(row, 0, false, false), raw_score(0) { 144 : HistoryMatch(row, 0, false, false), raw_score(0) {
142 // NOTE: Call Init() before doing any validity checking to ensure that the 145 // NOTE: Call Init() before doing any validity checking to ensure that the
143 // class is always initialized after an instance has been constructed. In 146 // class is always initialized after an instance has been constructed. In
144 // particular, this ensures that the class is initialized after an instance 147 // particular, this ensures that the class is initialized after an instance
145 // has been constructed via the no-args constructor. 148 // has been constructed via the no-args constructor.
146 ScoredHistoryMatch::Init(); 149 ScoredHistoryMatch::Init();
147 150
148 // Figure out where each search term appears in the URL and/or page title 151 // Figure out where each search term appears in the URL and/or page title
149 // so that we can score as well as provide autocomplete highlighting. 152 // so that we can score as well as provide autocomplete highlighting.
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
247 likely_can_inline = true; 250 likely_can_inline = true;
248 innermost_match = (best_inlineable_prefix->num_components == 251 innermost_match = (best_inlineable_prefix->num_components ==
249 best_prefix->num_components); 252 best_prefix->num_components);
250 } 253 }
251 } 254 }
252 } 255 }
253 256
254 const float topicality_score = GetTopicalityScore( 257 const float topicality_score = GetTopicalityScore(
255 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts); 258 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);
256 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits); 259 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);
257 raw_score = base::saturated_cast<int>( 260 const float specificity_score =
258 GetFinalRelevancyScore(topicality_score, frequency_score)); 261 GetDocumentSpecificityScore(num_matching_pages);
262 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore(
263 topicality_score, frequency_score, specificity_score));
259 264
260 if (also_do_hup_like_scoring_ && likely_can_inline) { 265 if (also_do_hup_like_scoring_ && likely_can_inline) {
261 // HistoryURL-provider-like scoring gives any match that is 266 // HistoryURL-provider-like scoring gives any match that is
262 // capable of being inlined a certain minimum score. Some of these 267 // capable of being inlined a certain minimum score. Some of these
263 // are given a higher score that lets them be shown in inline. 268 // are given a higher score that lets them be shown in inline.
264 // This test here derives from the test in 269 // This test here derives from the test in
265 // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 270 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
266 const bool promote_to_inline = 271 const bool promote_to_inline =
267 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1)); 272 (row.typed_count() > 1) || (IsHostOnly() && (row.typed_count() == 1));
268 int hup_like_score = 273 int hup_like_score =
(...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after
603 // Compute the average weighted value_of_transition and return it. 608 // Compute the average weighted value_of_transition and return it.
604 // Use |max_visits_to_score_| as the denominator for the average regardless of 609 // Use |max_visits_to_score_| as the denominator for the average regardless of
605 // how many visits there were in order to penalize a match that has 610 // how many visits there were in order to penalize a match that has
606 // fewer visits than kMaxVisitsToScore. 611 // fewer visits than kMaxVisitsToScore.
607 if (fix_few_visits_bug_) 612 if (fix_few_visits_bug_)
608 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_; 613 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;
609 return visits.size() * summed_visit_points / 614 return visits.size() * summed_visit_points /
610 ScoredHistoryMatch::max_visits_to_score_; 615 ScoredHistoryMatch::max_visits_to_score_;
611 } 616 }
612 617
618 float ScoredHistoryMatch::GetDocumentSpecificityScore(
619 size_t num_matching_pages) const {
620 // A mapping from the number of matching pages to their associated document
621 // specificity scores. See omnibox_field_trial.h for more details.
622 CR_DEFINE_STATIC_LOCAL(OmniboxFieldTrial::NumMatchesScores,
623 default_matches_to_specificity,
624 (OmniboxFieldTrial::HQPNumMatchesScores()));
625 OmniboxFieldTrial::NumMatchesScores* matches_to_specificity =
626 matches_to_specificity_override_ ? matches_to_specificity_override_
627 : &default_matches_to_specificity;
628
629 // The floating point value below must be less than the lowest score the
630 // server would send down.
631 OmniboxFieldTrial::NumMatchesScores::const_iterator it = std::upper_bound(
632 matches_to_specificity->begin(), matches_to_specificity->end(),
633 std::pair<size_t, double>{num_matching_pages, -1});
634 return (it != matches_to_specificity->end()) ? it->second : 1.0;
635 };
636
613 // static 637 // static
614 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score, 638 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,
615 float frequency_score) { 639 float frequency_score,
640 float specificity_score) {
616 // |relevance_buckets| gives a mapping from intemerdiate score to the final 641 // |relevance_buckets| gives a mapping from intemerdiate score to the final
617 // relevance score. 642 // relevance score.
618 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets, 643 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets,
619 (GetHQPBuckets())); 644 (GetHQPBuckets()));
620 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_ 645 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_
621 ? relevance_buckets_override_ 646 ? relevance_buckets_override_
622 : &default_relevance_buckets; 647 : &default_relevance_buckets;
623 DCHECK(!relevance_buckets->empty()); 648 DCHECK(!relevance_buckets->empty());
624 DCHECK_EQ(0.0, (*relevance_buckets)[0].first); 649 DCHECK_EQ(0.0, (*relevance_buckets)[0].first);
625 650
626 if (topicality_score == 0) 651 if (topicality_score == 0)
627 return 0; 652 return 0;
628 // Here's how to interpret intermediate_score: Suppose the omnibox 653 // Here's how to interpret intermediate_score: Suppose the omnibox has one
629 // has one input term. Suppose we have a URL for which the omnibox 654 // input term. Suppose the input matches many documents. (This implies
655 // specificity_score == 1.0.) Suppose we have a URL for which the omnibox
630 // input term has a single URL hostname hit at a word boundary. (This 656 // input term has a single URL hostname hit at a word boundary. (This
631 // implies topicality_score = 1.0.). Then the intermediate_score for 657 // implies topicality_score = 1.0.). Then the intermediate_score for
632 // this URL will depend entirely on the frequency_score with 658 // this URL will depend entirely on the frequency_score with
633 // this interpretation: 659 // this interpretation:
634 // - a single typed visit more than three months ago, no other visits -> 0.2 660 // - a single typed visit more than three months ago, no other visits -> 0.2
635 // - a visit every three days, no typed visits -> 0.706 661 // - a visit every three days, no typed visits -> 0.706
636 // - a visit every day, no typed visits -> 0.916 662 // - a visit every day, no typed visits -> 0.916
637 // - a single typed visit yesterday, no other visits -> 2.0 663 // - a single typed visit yesterday, no other visits -> 2.0
638 // - a typed visit once a week -> 11.77 664 // - a typed visit once a week -> 11.77
639 // - a typed visit every three days -> 14.12 665 // - a typed visit every three days -> 14.12
640 // - at least ten typed visits today -> 20.0 (maximum score) 666 // - at least ten typed visits today -> 20.0 (maximum score)
641 // 667 //
642 // The below code maps intermediate_score to the range [0, 1399]. 668 // The below code maps intermediate_score to the range [0, 1399].
643 // For example: 669 // For example:
644 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399" 670 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"
645 // We will linearly interpolate the scores between: 671 // We will linearly interpolate the scores between:
646 // 0 to 1.5 --> 400 to 600 672 // 0 to 1.5 --> 400 to 600
647 // 1.5 to 12.0 --> 600 to 1300 673 // 1.5 to 12.0 --> 600 to 1300
648 // 12.0 to 20.0 --> 1300 to 1399 674 // 12.0 to 20.0 --> 1300 to 1399
649 // >= 20.0 --> 1399 675 // >= 20.0 --> 1399
650 // 676 //
651 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result 677 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result
652 // from HistoryURL provider). 678 // from HistoryURL provider).
653 const float intermediate_score = topicality_score * frequency_score; 679 const float intermediate_score =
680 topicality_score * frequency_score * specificity_score;
654 681
655 // Find the threshold where intermediate score is greater than bucket. 682 // Find the threshold where intermediate score is greater than bucket.
656 size_t i = 1; 683 size_t i = 1;
657 for (; i < relevance_buckets->size(); ++i) { 684 for (; i < relevance_buckets->size(); ++i) {
658 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i]; 685 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i];
659 if (intermediate_score >= bucket.first) { 686 if (intermediate_score >= bucket.first) {
660 continue; 687 continue;
661 } 688 }
662 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1]; 689 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1];
663 const float slope = ((bucket.second - previous_bucket.second) / 690 const float slope = ((bucket.second - previous_bucket.second) /
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
696 ScoreMaxRelevance bucket; 723 ScoreMaxRelevance bucket;
697 bool is_valid_intermediate_score = 724 bool is_valid_intermediate_score =
698 base::StringToDouble(it->first, &bucket.first); 725 base::StringToDouble(it->first, &bucket.first);
699 DCHECK(is_valid_intermediate_score); 726 DCHECK(is_valid_intermediate_score);
700 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); 727 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);
701 DCHECK(is_valid_hqp_score); 728 DCHECK(is_valid_hqp_score);
702 hqp_buckets.push_back(bucket); 729 hqp_buckets.push_back(bucket);
703 } 730 }
704 return hqp_buckets; 731 return hqp_buckets;
705 } 732 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698