Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(420)

Side by Side Diff: components/omnibox/browser/scored_history_match.h

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)
Patch Set: improved comments and formatting Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_
6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ 6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_
7 7
8 #include <stddef.h> 8 #include <stddef.h>
9 9
10 #include <string> 10 #include <string>
11 #include <vector> 11 #include <vector>
12 12
13 #include "base/gtest_prod_util.h" 13 #include "base/gtest_prod_util.h"
14 #include "base/strings/string16.h" 14 #include "base/strings/string16.h"
15 #include "base/time/time.h" 15 #include "base/time/time.h"
16 #include "components/history/core/browser/history_match.h" 16 #include "components/history/core/browser/history_match.h"
17 #include "components/history/core/browser/history_types.h" 17 #include "components/history/core/browser/history_types.h"
18 #include "components/omnibox/browser/in_memory_url_index_types.h" 18 #include "components/omnibox/browser/in_memory_url_index_types.h"
19 #include "components/omnibox/browser/omnibox_field_trial.h"
19 20
20 class ScoredHistoryMatchTest; 21 class ScoredHistoryMatchTest;
21 22
22 // An HistoryMatch that has a score as well as metrics defining where in the 23 // An HistoryMatch that has a score as well as metrics defining where in the
23 // history item's URL and/or page title matches have occurred. 24 // history item's URL and/or page title matches have occurred.
24 struct ScoredHistoryMatch : public history::HistoryMatch { 25 struct ScoredHistoryMatch : public history::HistoryMatch {
25 // ScoreMaxRelevance maps from an intermediate-score to the maximum 26 // ScoreMaxRelevance maps from an intermediate-score to the maximum
26 // final-relevance score given to a URL for this intermediate score. 27 // final-relevance score given to a URL for this intermediate score.
27 // This is used to store the score ranges of HQP relevance buckets. 28 // This is used to store the score ranges of HQP relevance buckets.
28 // Please see GetFinalRelevancyScore() for details. 29 // Please see GetFinalRelevancyScore() for details.
29 typedef std::pair<double, int> ScoreMaxRelevance; 30 typedef std::pair<double, int> ScoreMaxRelevance;
30 31
31 // Required for STL, we don't use this directly. 32 // Required for STL, we don't use this directly.
32 ScoredHistoryMatch(); 33 ScoredHistoryMatch();
33 ScoredHistoryMatch(const ScoredHistoryMatch& other); 34 ScoredHistoryMatch(const ScoredHistoryMatch& other);
34 35
35 // Initializes the ScoredHistoryMatch with a raw score calculated for the 36 // Initializes the ScoredHistoryMatch with a raw score calculated for the
36 // history item given in |row| with recent visits as indicated in |visits|. It 37 // history item given in |row| with recent visits as indicated in |visits|. It
37 // first determines if the row qualifies by seeing if all of the terms in 38 // first determines if the row qualifies by seeing if all of the terms in
38 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw 39 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw
39 // score is in part determined by whether the matches occur at word 40 // score is in part determined by whether the matches occur at word
40 // boundaries, the locations of which are stored in |word_starts|. For some 41 // boundaries, the locations of which are stored in |word_starts|. For some
41 // terms, it's appropriate to look for the word boundary within the term. For 42 // terms, it's appropriate to look for the word boundary within the term. For
42 // instance, the term ".net" should look for a word boundary at the "n". 43 // instance, the term ".net" should look for a word boundary at the "n".
43 // These offsets (".net" should have an offset of 1) come from 44 // These offsets (".net" should have an offset of 1) come from
44 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the 45 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the
45 // match's URL is referenced by any bookmarks, which can also affect the raw 46 // match's URL is referenced by any bookmarks, which can also affect the raw
46 // score. The raw score allows the matches to be ordered and can be used to 47 // score. |num_matching_pages| indicates how many URLs in the eligible URL
47 // influence the final score calculated by the client of this index. If the 48 // database match the user's input; it can also affect the raw score. The raw
48 // row does not qualify the raw score will be 0. 49 // score allows the matches to be ordered and can be used to influence the
50 // final score calculated by the client of this index. If the row does not
51 // qualify the raw score will be 0.
49 ScoredHistoryMatch(const history::URLRow& row, 52 ScoredHistoryMatch(const history::URLRow& row,
50 const VisitInfoVector& visits, 53 const VisitInfoVector& visits,
51 const base::string16& lower_string, 54 const base::string16& lower_string,
52 const String16Vector& terms_vector, 55 const String16Vector& terms_vector,
53 const WordStarts& terms_to_word_starts_offsets, 56 const WordStarts& terms_to_word_starts_offsets,
54 const RowWordStarts& word_starts, 57 const RowWordStarts& word_starts,
55 bool is_url_bookmarked, 58 bool is_url_bookmarked,
59 size_t num_matching_pages,
56 base::Time now); 60 base::Time now);
57 61
58 ~ScoredHistoryMatch(); 62 ~ScoredHistoryMatch();
59 63
60 // Compares two matches by score. Functor supporting URLIndexPrivateData's 64 // Compares two matches by score. Functor supporting URLIndexPrivateData's
61 // HistoryItemsForTerms function. Looks at particular fields within 65 // HistoryItemsForTerms function. Looks at particular fields within
62 // with url_info to make tie-breaking a bit smarter. 66 // with url_info to make tie-breaking a bit smarter.
63 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, 67 static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
64 const ScoredHistoryMatch& m2); 68 const ScoredHistoryMatch& m2);
65 69
(...skipping 20 matching lines...) Expand all
86 // unimportant matches are eliminated by GetTopicalityScore(), called 90 // unimportant matches are eliminated by GetTopicalityScore(), called
87 // during construction.) 91 // during construction.)
88 92
89 // Term matches within the URL. 93 // Term matches within the URL.
90 TermMatches url_matches; 94 TermMatches url_matches;
91 // Term matches within the page title. 95 // Term matches within the page title.
92 TermMatches title_matches; 96 TermMatches title_matches;
93 97
94 private: 98 private:
95 friend class ScoredHistoryMatchTest; 99 friend class ScoredHistoryMatchTest;
100 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetDocumentSpecificityScore);
96 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore); 101 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore);
97 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFrequency); 102 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFrequency);
98 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetHQPBucketsFromString); 103 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetHQPBucketsFromString);
99 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks); 104 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks);
100 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme); 105 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme);
101 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD); 106 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD);
102 107
103 // Initialize ScoredHistoryMatch statics. Must be called before any other 108 // Initialize ScoredHistoryMatch statics. Must be called before any other
104 // method of ScoredHistoryMatch and before creating any instances. 109 // method of ScoredHistoryMatch and before creating any instances.
105 static void Init(); 110 static void Init();
(...skipping 15 matching lines...) Expand all
121 126
122 // Examines the first |max_visits_to_score_| and returns a score (higher is 127 // Examines the first |max_visits_to_score_| and returns a score (higher is
123 // better) based the rate of visits, whether the page is bookmarked, and 128 // better) based the rate of visits, whether the page is bookmarked, and
124 // how often those visits are typed navigations (i.e., explicitly 129 // how often those visits are typed navigations (i.e., explicitly
125 // invoked by the user). |now| is passed in to avoid unnecessarily 130 // invoked by the user). |now| is passed in to avoid unnecessarily
126 // recomputing it frequently. 131 // recomputing it frequently.
127 float GetFrequency(const base::Time& now, 132 float GetFrequency(const base::Time& now,
128 const bool bookmarked, 133 const bool bookmarked,
129 const VisitInfoVector& visits) const; 134 const VisitInfoVector& visits) const;
130 135
131 // Combines the two component scores into a final score that's 136 // Returns a document specificity score based on how many pages matched the
137 // user's input.
138 float GetDocumentSpecificityScore(const size_t num_matching_pages) const;
Peter Kasting 2016/12/06 05:19:30 I'd avoid declaring the parameter here const.
Mark P 2016/12/08 00:21:31 Done.
139
140 // Combines the three component scores into a final score that's
132 // an appropriate value to use as a relevancy score. Scoring buckets are 141 // an appropriate value to use as a relevancy score. Scoring buckets are
133 // specified through |hqp_relevance_buckets|. Please see the function 142 // specified through |hqp_relevance_buckets|. Please see the function
134 // implementation for more details. 143 // implementation for more details.
135 static float GetFinalRelevancyScore( 144 static float GetFinalRelevancyScore(
136 float topicality_score, 145 float topicality_score,
137 float frequency_score, 146 float frequency_score,
147 float specificity_score,
138 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets); 148 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets);
139 149
140 // Initializes the HQP experimental params: |hqp_relevance_buckets_| 150 // Initializes the HQP experimental params: |hqp_relevance_buckets_|
141 // to default buckets. If hqp experimental scoring is enabled, it 151 // to default buckets. If hqp experimental scoring is enabled, it
142 // fetches the |hqp_experimental_scoring_enabled_|, |topicality_threshold_| 152 // fetches the |hqp_experimental_scoring_enabled_|, |topicality_threshold_|
143 // and |hqp_relevance_buckets_| from omnibox field trials. 153 // and |hqp_relevance_buckets_| from omnibox field trials.
144 static void InitHQPExperimentalParams(); 154 static void InitHQPExperimentalParams();
145 155
146 // Helper function to parse the string containing the scoring buckets. 156 // Helper function to parse the string containing the scoring buckets.
147 // For example, 157 // For example,
(...skipping 16 matching lines...) Expand all
164 static float typed_value_; 174 static float typed_value_;
165 175
166 // True if we should fix a bug in frequency scoring relating to how we 176 // True if we should fix a bug in frequency scoring relating to how we
167 // extrapolate frecency when the URL has been visited few times. 177 // extrapolate frecency when the URL has been visited few times.
168 static bool fix_few_visits_bug_; 178 static bool fix_few_visits_bug_;
169 179
170 // Determines whether GetFrequency() returns a score based on on the weighted 180 // Determines whether GetFrequency() returns a score based on on the weighted
171 // sum of visit scores instead of the weighted average. 181 // sum of visit scores instead of the weighted average.
172 static bool frequency_uses_sum_; 182 static bool frequency_uses_sum_;
173 183
184 // A mapping from the number of matching pages to their associated document
185 // specificity scores. See omnibox_field_trial.h for more details.
186 static OmniboxFieldTrial::NumMatchesScores*
187 num_matches_to_document_specificity_score_;
188
174 // The maximum number of recent visits to examine in GetFrequency(). 189 // The maximum number of recent visits to examine in GetFrequency().
175 static size_t max_visits_to_score_; 190 static size_t max_visits_to_score_;
176 191
177 // If true, we allow input terms to match in the TLD (e.g., ".com"). 192 // If true, we allow input terms to match in the TLD (e.g., ".com").
178 static bool allow_tld_matches_; 193 static bool allow_tld_matches_;
179 194
180 // If true, we allow input terms to match in the scheme (e.g., "http://"). 195 // If true, we allow input terms to match in the scheme (e.g., "http://").
181 static bool allow_scheme_matches_; 196 static bool allow_scheme_matches_;
182 197
183 // The number of title words examined when computing topicality scores. 198 // The number of title words examined when computing topicality scores.
(...skipping 13 matching lines...) Expand all
197 static char hqp_relevance_buckets_str_[]; 212 static char hqp_relevance_buckets_str_[];
198 213
199 // |hqp_relevance_buckets_| gives mapping from (topicality*frequency) 214 // |hqp_relevance_buckets_| gives mapping from (topicality*frequency)
200 // to the final relevance scoring. Please see GetFinalRelevancyScore() 215 // to the final relevance scoring. Please see GetFinalRelevancyScore()
201 // for more details and scoring method. 216 // for more details and scoring method.
202 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_; 217 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_;
203 }; 218 };
204 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; 219 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
205 220
206 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ 221 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698