components/omnibox/browser/scored_history_match.cc - Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)

Patch Set: fix rebase errors Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/scored_history_match.h"	5 #include "components/omnibox/browser/scored_history_match.h"

6	6

7 #include <math.h>	7 #include <math.h>

8	8

9 #include <algorithm>	9 #include <algorithm>

10 #include <vector>	10 #include <vector>

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
110 float ScoredHistoryMatch::typed_value_;	110 float ScoredHistoryMatch::typed_value_;

111 bool ScoredHistoryMatch::fix_few_visits_bug_;	111 bool ScoredHistoryMatch::fix_few_visits_bug_;

112 bool ScoredHistoryMatch::frequency_uses_sum_;	112 bool ScoredHistoryMatch::frequency_uses_sum_;

113 size_t ScoredHistoryMatch::max_visits_to_score_;	113 size_t ScoredHistoryMatch::max_visits_to_score_;

114 bool ScoredHistoryMatch::allow_tld_matches_;	114 bool ScoredHistoryMatch::allow_tld_matches_;

115 bool ScoredHistoryMatch::allow_scheme_matches_;	115 bool ScoredHistoryMatch::allow_scheme_matches_;

116 size_t ScoredHistoryMatch::num_title_words_to_allow_;	116 size_t ScoredHistoryMatch::num_title_words_to_allow_;

117 float ScoredHistoryMatch::topicality_threshold_;	117 float ScoredHistoryMatch::topicality_threshold_;

118 ScoredHistoryMatch::ScoreMaxRelevances*	118 ScoredHistoryMatch::ScoreMaxRelevances*

119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr;	119 ScoredHistoryMatch::relevance_buckets_override_ = nullptr;

	120 OmniboxFieldTrial::NumMatchesScores*

	121 ScoredHistoryMatch::matches_to_specificity_override_ = nullptr;

120	122

121 ScoredHistoryMatch::ScoredHistoryMatch()	123 ScoredHistoryMatch::ScoredHistoryMatch()

122 : ScoredHistoryMatch(history::URLRow(),	124 : ScoredHistoryMatch(history::URLRow(),

123 VisitInfoVector(),	125 VisitInfoVector(),

124 base::string16(),	126 base::string16(),

125 String16Vector(),	127 String16Vector(),

126 WordStarts(),	128 WordStarts(),

127 RowWordStarts(),	129 RowWordStarts(),

128 false,	130 false,

129 base::Time::Max()) {	131 1,

130 }	132 base::Time::Max()) {}

131	133

132 ScoredHistoryMatch::ScoredHistoryMatch(	134 ScoredHistoryMatch::ScoredHistoryMatch(

133 const history::URLRow& row,	135 const history::URLRow& row,

134 const VisitInfoVector& visits,	136 const VisitInfoVector& visits,

135 const base::string16& lower_string,	137 const base::string16& lower_string,

136 const String16Vector& terms_vector,	138 const String16Vector& terms_vector,

137 const WordStarts& terms_to_word_starts_offsets,	139 const WordStarts& terms_to_word_starts_offsets,

138 const RowWordStarts& word_starts,	140 const RowWordStarts& word_starts,

139 bool is_url_bookmarked,	141 bool is_url_bookmarked,

	142 size_t num_matching_pages,

140 base::Time now)	143 base::Time now)

141 : HistoryMatch(row, 0, false, false), raw_score(0) {	144 : HistoryMatch(row, 0, false, false), raw_score(0) {

142 // NOTE: Call Init() before doing any validity checking to ensure that the	145 // NOTE: Call Init() before doing any validity checking to ensure that the

143 // class is always initialized after an instance has been constructed. In	146 // class is always initialized after an instance has been constructed. In

144 // particular, this ensures that the class is initialized after an instance	147 // particular, this ensures that the class is initialized after an instance

145 // has been constructed via the no-args constructor.	148 // has been constructed via the no-args constructor.

146 ScoredHistoryMatch::Init();	149 ScoredHistoryMatch::Init();

147	150

148 // Figure out where each search term appears in the URL and/or page title	151 // Figure out where each search term appears in the URL and/or page title

149 // so that we can score as well as provide autocomplete highlighting.	152 // so that we can score as well as provide autocomplete highlighting.

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
247 likely_can_inline = true;	250 likely_can_inline = true;

248 innermost_match = (best_inlineable_prefix->num_components ==	251 innermost_match = (best_inlineable_prefix->num_components ==

249 best_prefix->num_components);	252 best_prefix->num_components);

250 }	253 }

251 }	254 }

252 }	255 }

253	256

254 const float topicality_score = GetTopicalityScore(	257 const float topicality_score = GetTopicalityScore(

255 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);	258 terms_vector.size(), url, terms_to_word_starts_offsets, word_starts);

256 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);	259 const float frequency_score = GetFrequency(now, is_url_bookmarked, visits);

257 raw_score = base::saturated_cast<int>(	260 const float specificity_score =

258 GetFinalRelevancyScore(topicality_score, frequency_score));	261 GetDocumentSpecificityScore(num_matching_pages);

	262 raw_score = base::saturated_cast<int>(GetFinalRelevancyScore(

	263 topicality_score, frequency_score, specificity_score));

259	264

260 if (also_do_hup_like_scoring_ && likely_can_inline) {	265 if (also_do_hup_like_scoring_ && likely_can_inline) {

261 // HistoryURL-provider-like scoring gives any match that is	266 // HistoryURL-provider-like scoring gives any match that is

262 // capable of being inlined a certain minimum score. Some of these	267 // capable of being inlined a certain minimum score. Some of these

263 // are given a higher score that lets them be shown in inline.	268 // are given a higher score that lets them be shown in inline.

264 // This test here derives from the test in	269 // This test here derives from the test in

265 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().	270 // HistoryURLProvider::PromoteMatchForInlineAutocomplete().

266 const bool promote_to_inline =	271 const bool promote_to_inline =

267 (row.typed_count() > 1) \|\| (IsHostOnly() && (row.typed_count() == 1));	272 (row.typed_count() > 1) \|\| (IsHostOnly() && (row.typed_count() == 1));

268 int hup_like_score =	273 int hup_like_score =

(...skipping 334 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
603 // Compute the average weighted value_of_transition and return it.	608 // Compute the average weighted value_of_transition and return it.

604 // Use \|max_visits_to_score_\| as the denominator for the average regardless of	609 // Use \|max_visits_to_score_\| as the denominator for the average regardless of

605 // how many visits there were in order to penalize a match that has	610 // how many visits there were in order to penalize a match that has

606 // fewer visits than kMaxVisitsToScore.	611 // fewer visits than kMaxVisitsToScore.

607 if (fix_few_visits_bug_)	612 if (fix_few_visits_bug_)

608 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;	613 return summed_visit_points / ScoredHistoryMatch::max_visits_to_score_;

609 return visits.size() * summed_visit_points /	614 return visits.size() * summed_visit_points /

610 ScoredHistoryMatch::max_visits_to_score_;	615 ScoredHistoryMatch::max_visits_to_score_;

611 }	616 }

612	617

	618 float ScoredHistoryMatch::GetDocumentSpecificityScore(

	619 size_t num_matching_pages) const {

	620 // A mapping from the number of matching pages to their associated document

	621 // specificity scores. See omnibox_field_trial.h for more details.

	622 CR_DEFINE_STATIC_LOCAL(OmniboxFieldTrial::NumMatchesScores,

	623 default_matches_to_specificity,

	624 (OmniboxFieldTrial::HQPNumMatchesScores()));

	625 OmniboxFieldTrial::NumMatchesScores* matches_to_specificity =

	626 matches_to_specificity_override_ ? matches_to_specificity_override_

	627 : &default_matches_to_specificity;

	628

	629 // The floating point value below must be less than the lowest score the

	630 // server would send down.

	631 OmniboxFieldTrial::NumMatchesScores::const_iterator it = std::upper_bound(

	632 matches_to_specificity->begin(), matches_to_specificity->end(),

	633 std::pair<size_t, double>{num_matching_pages, -1});

	634 return (it != matches_to_specificity->end()) ? it->second : 1.0;

	635 };

	636

613 // static	637 // static

614 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,	638 float ScoredHistoryMatch::GetFinalRelevancyScore(float topicality_score,

615 float frequency_score) {	639 float frequency_score,

	640 float specificity_score) {

616 // \|relevance_buckets\| gives a mapping from intemerdiate score to the final	641 // \|relevance_buckets\| gives a mapping from intemerdiate score to the final

617 // relevance score.	642 // relevance score.

618 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets,	643 CR_DEFINE_STATIC_LOCAL(ScoreMaxRelevances, default_relevance_buckets,

619 (GetHQPBuckets()));	644 (GetHQPBuckets()));

620 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_	645 ScoreMaxRelevances* relevance_buckets = relevance_buckets_override_

621 ? relevance_buckets_override_	646 ? relevance_buckets_override_

622 : &default_relevance_buckets;	647 : &default_relevance_buckets;

623 DCHECK(!relevance_buckets->empty());	648 DCHECK(!relevance_buckets->empty());

624 DCHECK_EQ(0.0, (*relevance_buckets)[0].first);	649 DCHECK_EQ(0.0, (*relevance_buckets)[0].first);

625	650

626 if (topicality_score == 0)	651 if (topicality_score == 0)

627 return 0;	652 return 0;

628 // Here's how to interpret intermediate_score: Suppose the omnibox	653 // Here's how to interpret intermediate_score: Suppose the omnibox has one

629 // has one input term. Suppose we have a URL for which the omnibox	654 // input term. Suppose the input matches many documents. (This implies

	655 // specificity_score == 1.0.) Suppose we have a URL for which the omnibox

630 // input term has a single URL hostname hit at a word boundary. (This	656 // input term has a single URL hostname hit at a word boundary. (This

631 // implies topicality_score = 1.0.). Then the intermediate_score for	657 // implies topicality_score = 1.0.). Then the intermediate_score for

632 // this URL will depend entirely on the frequency_score with	658 // this URL will depend entirely on the frequency_score with

633 // this interpretation:	659 // this interpretation:

634 // - a single typed visit more than three months ago, no other visits -> 0.2	660 // - a single typed visit more than three months ago, no other visits -> 0.2

635 // - a visit every three days, no typed visits -> 0.706	661 // - a visit every three days, no typed visits -> 0.706

636 // - a visit every day, no typed visits -> 0.916	662 // - a visit every day, no typed visits -> 0.916

637 // - a single typed visit yesterday, no other visits -> 2.0	663 // - a single typed visit yesterday, no other visits -> 2.0

638 // - a typed visit once a week -> 11.77	664 // - a typed visit once a week -> 11.77

639 // - a typed visit every three days -> 14.12	665 // - a typed visit every three days -> 14.12

640 // - at least ten typed visits today -> 20.0 (maximum score)	666 // - at least ten typed visits today -> 20.0 (maximum score)

641 //	667 //

642 // The below code maps intermediate_score to the range [0, 1399].	668 // The below code maps intermediate_score to the range [0, 1399].

643 // For example:	669 // For example:

644 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"	670 // The default scoring buckets: "0.0:400,1.5:600,12.0:1300,20.0:1399"

645 // We will linearly interpolate the scores between:	671 // We will linearly interpolate the scores between:

646 // 0 to 1.5 --> 400 to 600	672 // 0 to 1.5 --> 400 to 600

647 // 1.5 to 12.0 --> 600 to 1300	673 // 1.5 to 12.0 --> 600 to 1300

648 // 12.0 to 20.0 --> 1300 to 1399	674 // 12.0 to 20.0 --> 1300 to 1399

649 // >= 20.0 --> 1399	675 // >= 20.0 --> 1399

650 //	676 //

651 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result	677 // The score maxes out at 1399 (i.e., cannot beat a good inlineable result

652 // from HistoryURL provider).	678 // from HistoryURL provider).

653 const float intermediate_score = topicality_score * frequency_score;	679 const float intermediate_score =

	680 topicality_score * frequency_score * specificity_score;

654	681

655 // Find the threshold where intermediate score is greater than bucket.	682 // Find the threshold where intermediate score is greater than bucket.

656 size_t i = 1;	683 size_t i = 1;

657 for (; i < relevance_buckets->size(); ++i) {	684 for (; i < relevance_buckets->size(); ++i) {

658 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i];	685 const ScoreMaxRelevance& bucket = (*relevance_buckets)[i];

659 if (intermediate_score >= bucket.first) {	686 if (intermediate_score >= bucket.first) {

660 continue;	687 continue;

661 }	688 }

662 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1];	689 const ScoreMaxRelevance& previous_bucket = (*relevance_buckets)[i - 1];

663 const float slope = ((bucket.second - previous_bucket.second) /	690 const float slope = ((bucket.second - previous_bucket.second) /

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
696 ScoreMaxRelevance bucket;	723 ScoreMaxRelevance bucket;

697 bool is_valid_intermediate_score =	724 bool is_valid_intermediate_score =

698 base::StringToDouble(it->first, &bucket.first);	725 base::StringToDouble(it->first, &bucket.first);

699 DCHECK(is_valid_intermediate_score);	726 DCHECK(is_valid_intermediate_score);

700 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);	727 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);

701 DCHECK(is_valid_hqp_score);	728 DCHECK(is_valid_hqp_score);

702 hqp_buckets.push_back(bucket);	729 hqp_buckets.push_back(bucket);

703 }	730 }

704 return hqp_buckets;	731 return hqp_buckets;

705 }	732 }

OLD	NEW

« no previous file with comments | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »