OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ | 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ |
6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ | 6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ |
7 | 7 |
8 #include <stddef.h> | 8 #include <stddef.h> |
9 | 9 |
10 #include <string> | 10 #include <string> |
11 #include <vector> | 11 #include <vector> |
12 | 12 |
13 #include "base/gtest_prod_util.h" | 13 #include "base/gtest_prod_util.h" |
14 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
15 #include "base/time/time.h" | 15 #include "base/time/time.h" |
16 #include "components/history/core/browser/history_match.h" | 16 #include "components/history/core/browser/history_match.h" |
17 #include "components/history/core/browser/history_types.h" | 17 #include "components/history/core/browser/history_types.h" |
18 #include "components/omnibox/browser/in_memory_url_index_types.h" | 18 #include "components/omnibox/browser/in_memory_url_index_types.h" |
19 #include "components/omnibox/browser/omnibox_field_trial.h" | |
19 | 20 |
20 class ScoredHistoryMatchTest; | 21 class ScoredHistoryMatchTest; |
21 | 22 |
22 // An HistoryMatch that has a score as well as metrics defining where in the | 23 // An HistoryMatch that has a score as well as metrics defining where in the |
23 // history item's URL and/or page title matches have occurred. | 24 // history item's URL and/or page title matches have occurred. |
24 struct ScoredHistoryMatch : public history::HistoryMatch { | 25 struct ScoredHistoryMatch : public history::HistoryMatch { |
25 // ScoreMaxRelevance maps from an intermediate-score to the maximum | 26 // ScoreMaxRelevance maps from an intermediate-score to the maximum |
26 // final-relevance score given to a URL for this intermediate score. | 27 // final-relevance score given to a URL for this intermediate score. |
27 // This is used to store the score ranges of HQP relevance buckets. | 28 // This is used to store the score ranges of HQP relevance buckets. |
28 // Please see GetFinalRelevancyScore() for details. | 29 // Please see GetFinalRelevancyScore() for details. |
29 typedef std::pair<double, int> ScoreMaxRelevance; | 30 typedef std::pair<double, int> ScoreMaxRelevance; |
30 | 31 |
31 // Required for STL, we don't use this directly. | 32 // Required for STL, we don't use this directly. |
32 ScoredHistoryMatch(); | 33 ScoredHistoryMatch(); |
33 ScoredHistoryMatch(const ScoredHistoryMatch& other); | 34 ScoredHistoryMatch(const ScoredHistoryMatch& other); |
34 | 35 |
35 // Initializes the ScoredHistoryMatch with a raw score calculated for the | 36 // Initializes the ScoredHistoryMatch with a raw score calculated for the |
36 // history item given in |row| with recent visits as indicated in |visits|. It | 37 // history item given in |row| with recent visits as indicated in |visits|. It |
37 // first determines if the row qualifies by seeing if all of the terms in | 38 // first determines if the row qualifies by seeing if all of the terms in |
38 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw | 39 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw |
39 // score is in part determined by whether the matches occur at word | 40 // score is in part determined by whether the matches occur at word |
40 // boundaries, the locations of which are stored in |word_starts|. For some | 41 // boundaries, the locations of which are stored in |word_starts|. For some |
41 // terms, it's appropriate to look for the word boundary within the term. For | 42 // terms, it's appropriate to look for the word boundary within the term. For |
42 // instance, the term ".net" should look for a word boundary at the "n". | 43 // instance, the term ".net" should look for a word boundary at the "n". |
43 // These offsets (".net" should have an offset of 1) come from | 44 // These offsets (".net" should have an offset of 1) come from |
44 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the | 45 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the |
45 // match's URL is referenced by any bookmarks, which can also affect the raw | 46 // match's URL is referenced by any bookmarks, which can also affect the raw |
46 // score. The raw score allows the matches to be ordered and can be used to | 47 // score. |num_matching_pages| indicates how many URLs in the eligible URL |
47 // influence the final score calculated by the client of this index. If the | 48 // database match the user's input; it can also affect the raw score. The raw |
48 // row does not qualify the raw score will be 0. | 49 // score allows the matches to be ordered and can be used to influence the |
50 // final score calculated by the client of this index. If the row does not | |
51 // qualify the raw score will be 0. | |
49 ScoredHistoryMatch(const history::URLRow& row, | 52 ScoredHistoryMatch(const history::URLRow& row, |
50 const VisitInfoVector& visits, | 53 const VisitInfoVector& visits, |
51 const base::string16& lower_string, | 54 const base::string16& lower_string, |
52 const String16Vector& terms_vector, | 55 const String16Vector& terms_vector, |
53 const WordStarts& terms_to_word_starts_offsets, | 56 const WordStarts& terms_to_word_starts_offsets, |
54 const RowWordStarts& word_starts, | 57 const RowWordStarts& word_starts, |
55 bool is_url_bookmarked, | 58 bool is_url_bookmarked, |
59 size_t num_matching_pages, | |
Peter Kasting
2016/12/01 07:07:53
Nit: There are a lot of args here and the unittest
Mark P
2016/12/04 01:06:42
Almost all of these args are requires and need rea
| |
56 base::Time now); | 60 base::Time now); |
57 | 61 |
58 ~ScoredHistoryMatch(); | 62 ~ScoredHistoryMatch(); |
59 | 63 |
60 // Compares two matches by score. Functor supporting URLIndexPrivateData's | 64 // Compares two matches by score. Functor supporting URLIndexPrivateData's |
61 // HistoryItemsForTerms function. Looks at particular fields within | 65 // HistoryItemsForTerms function. Looks at particular fields within |
62 // with url_info to make tie-breaking a bit smarter. | 66 // with url_info to make tie-breaking a bit smarter. |
63 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, | 67 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
64 const ScoredHistoryMatch& m2); | 68 const ScoredHistoryMatch& m2); |
65 | 69 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
116 const RowWordStarts& word_starts); | 120 const RowWordStarts& word_starts); |
117 | 121 |
118 // Returns a recency score based on |last_visit_days_ago|, which is | 122 // Returns a recency score based on |last_visit_days_ago|, which is |
119 // how many days ago the page was last visited. | 123 // how many days ago the page was last visited. |
120 float GetRecencyScore(int last_visit_days_ago) const; | 124 float GetRecencyScore(int last_visit_days_ago) const; |
121 | 125 |
122 // Examines the first |max_visits_to_score_| and returns a score (higher is | 126 // Examines the first |max_visits_to_score_| and returns a score (higher is |
123 // better) based the rate of visits, whether the page is bookmarked, and | 127 // better) based the rate of visits, whether the page is bookmarked, and |
124 // how often those visits are typed navigations (i.e., explicitly | 128 // how often those visits are typed navigations (i.e., explicitly |
125 // invoked by the user). |now| is passed in to avoid unnecessarily | 129 // invoked by the user). |now| is passed in to avoid unnecessarily |
126 // recomputing it frequently. | 130 // recomputing it frequently. |num_matching_pages| is used to alter the |
131 // frequency score depending on how many pages matched the user's input. | |
127 float GetFrequency(const base::Time& now, | 132 float GetFrequency(const base::Time& now, |
128 const bool bookmarked, | 133 const bool bookmarked, |
129 const VisitInfoVector& visits) const; | 134 const VisitInfoVector& visits, |
135 const size_t num_matching_pages) const; | |
130 | 136 |
131 // Combines the two component scores into a final score that's | 137 // Combines the two component scores into a final score that's |
132 // an appropriate value to use as a relevancy score. Scoring buckets are | 138 // an appropriate value to use as a relevancy score. Scoring buckets are |
133 // specified through |hqp_relevance_buckets|. Please see the function | 139 // specified through |hqp_relevance_buckets|. Please see the function |
134 // implementation for more details. | 140 // implementation for more details. |
135 static float GetFinalRelevancyScore( | 141 static float GetFinalRelevancyScore( |
136 float topicality_score, | 142 float topicality_score, |
137 float frequency_score, | 143 float frequency_score, |
138 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets); | 144 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets); |
139 | 145 |
(...skipping 24 matching lines...) Expand all Loading... | |
164 static float typed_value_; | 170 static float typed_value_; |
165 | 171 |
166 // True if we should fix a bug in frequency scoring relating to how we | 172 // True if we should fix a bug in frequency scoring relating to how we |
167 // extrapolate frecency when the URL has been visited few times. | 173 // extrapolate frecency when the URL has been visited few times. |
168 static bool fix_few_visits_bug_; | 174 static bool fix_few_visits_bug_; |
169 | 175 |
170 // Determines whether GetFrequency() returns a score based on on the weighted | 176 // Determines whether GetFrequency() returns a score based on on the weighted |
171 // sum of visit scores instead of the weighted average. | 177 // sum of visit scores instead of the weighted average. |
172 static bool frequency_uses_sum_; | 178 static bool frequency_uses_sum_; |
173 | 179 |
180 // A mapping from the number of matching pages to the multiplier by which | |
181 // all those matching pages' frequency scores should be multiplied. | |
182 // Omitted values are assumed to have multipliers of 1.0. | |
Peter Kasting
2016/12/01 07:07:53
Nit: I would omit this last sentence and possibly
Mark P
2016/12/04 01:06:42
Did both. Now with the restructuring of the data
| |
183 static OmniboxFieldTrial::NumMatchesMultipliers* | |
184 num_matches_to_frequency_multiplier_; | |
185 | |
174 // The maximum number of recent visits to examine in GetFrequency(). | 186 // The maximum number of recent visits to examine in GetFrequency(). |
175 static size_t max_visits_to_score_; | 187 static size_t max_visits_to_score_; |
176 | 188 |
177 // If true, we allow input terms to match in the TLD (e.g., ".com"). | 189 // If true, we allow input terms to match in the TLD (e.g., ".com"). |
178 static bool allow_tld_matches_; | 190 static bool allow_tld_matches_; |
179 | 191 |
180 // If true, we allow input terms to match in the scheme (e.g., "http://"). | 192 // If true, we allow input terms to match in the scheme (e.g., "http://"). |
181 static bool allow_scheme_matches_; | 193 static bool allow_scheme_matches_; |
182 | 194 |
183 // The number of title words examined when computing topicality scores. | 195 // The number of title words examined when computing topicality scores. |
(...skipping 13 matching lines...) Expand all Loading... | |
197 static char hqp_relevance_buckets_str_[]; | 209 static char hqp_relevance_buckets_str_[]; |
198 | 210 |
199 // |hqp_relevance_buckets_| gives mapping from (topicality*frequency) | 211 // |hqp_relevance_buckets_| gives mapping from (topicality*frequency) |
200 // to the final relevance scoring. Please see GetFinalRelevancyScore() | 212 // to the final relevance scoring. Please see GetFinalRelevancyScore() |
201 // for more details and scoring method. | 213 // for more details and scoring method. |
202 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_; | 214 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_; |
203 }; | 215 }; |
204 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; | 216 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
205 | 217 |
206 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ | 218 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ |
OLD | NEW |