OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ | 5 #ifndef CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ |
6 #define CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ | 6 #define CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <set> | 9 #include <set> |
10 #include <vector> | 10 #include <vector> |
(...skipping 11 matching lines...) Expand all Loading... | |
22 | 22 |
23 // An HistoryMatch that has a score as well as metrics defining where in the | 23 // An HistoryMatch that has a score as well as metrics defining where in the |
24 // history item's URL and/or page title matches have occurred. | 24 // history item's URL and/or page title matches have occurred. |
25 class ScoredHistoryMatch : public history::HistoryMatch { | 25 class ScoredHistoryMatch : public history::HistoryMatch { |
26 public: | 26 public: |
27 // The maximum number of recent visits to examine in GetFrequency(). | 27 // The maximum number of recent visits to examine in GetFrequency(). |
28 // Public so url_index_private_data.cc knows how many visits it is | 28 // Public so url_index_private_data.cc knows how many visits it is |
29 // expected to deliver (at minimum) to this class. | 29 // expected to deliver (at minimum) to this class. |
30 static const size_t kMaxVisitsToScore; | 30 static const size_t kMaxVisitsToScore; |
31 | 31 |
32 // ScoreMaxRelevance maps from intermediate-score to the final-relevance | |
Mark P
2015/02/14 01:27:14
from -> from an
to the -> to the maximum
Ashok vardhan
2015/02/17 01:23:53
Done.
| |
33 // score given to URL. This is used to store the score ranges of HQP relevance | |
Mark P
2015/02/14 01:27:14
to URL -> to a URL for this intermediate score.
Ashok vardhan
2015/02/17 01:23:53
Done.
| |
34 // buckets. Please see GetFinalRelevancyScore() for details. | |
35 typedef std::pair<double, int> ScoreMaxRelevance; | |
36 | |
32 ScoredHistoryMatch(); // Required by STL. | 37 ScoredHistoryMatch(); // Required by STL. |
33 | 38 |
34 // Creates a new match with a raw score calculated for the history item | 39 // Creates a new match with a raw score calculated for the history item |
35 // given in |row| with recent visits as indicated in |visits|. First | 40 // given in |row| with recent visits as indicated in |visits|. First |
36 // determines if the row qualifies by seeing if all of the terms in | 41 // determines if the row qualifies by seeing if all of the terms in |
37 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw | 42 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw |
38 // score is in part determined by whether the matches occur at word | 43 // score is in part determined by whether the matches occur at word |
39 // boundaries, the locations of which are stored in |word_starts|. For some | 44 // boundaries, the locations of which are stored in |word_starts|. For some |
40 // terms, it's appropriate to look for the word boundary within the term. | 45 // terms, it's appropriate to look for the word boundary within the term. |
41 // For instance, the term ".net" should look for a word boundary at the "n". | 46 // For instance, the term ".net" should look for a word boundary at the "n". |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
76 // end_pos == string::npos is treated as end_pos = length of string. | 81 // end_pos == string::npos is treated as end_pos = length of string. |
77 static TermMatches FilterTermMatchesByWordStarts( | 82 static TermMatches FilterTermMatchesByWordStarts( |
78 const TermMatches& term_matches, | 83 const TermMatches& term_matches, |
79 const WordStarts& terms_to_word_starts_offsets, | 84 const WordStarts& terms_to_word_starts_offsets, |
80 const WordStarts& word_starts, | 85 const WordStarts& word_starts, |
81 size_t start_pos, | 86 size_t start_pos, |
82 size_t end_pos); | 87 size_t end_pos); |
83 | 88 |
84 private: | 89 private: |
85 friend class ScoredHistoryMatchTest; | 90 friend class ScoredHistoryMatchTest; |
91 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore); | |
86 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks); | 92 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks); |
87 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringDiscountFrecency); | 93 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringDiscountFrecency); |
88 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme); | 94 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme); |
89 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD); | 95 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD); |
90 | 96 |
91 // The number of days of recency scores to precompute. | 97 // The number of days of recency scores to precompute. |
92 static const int kDaysToPrecomputeRecencyScoresFor; | 98 static const int kDaysToPrecomputeRecencyScoresFor; |
93 | 99 |
94 // The number of raw term score buckets use; raw term scores | 100 // The number of raw term score buckets use; raw term scores |
95 // greater this are capped at the score of the largest bucket. | 101 // greater this are capped at the score of the largest bucket. |
(...skipping 24 matching lines...) Expand all Loading... | |
120 // Examines the first kMaxVisitsToScore and return a score (higher is | 126 // Examines the first kMaxVisitsToScore and return a score (higher is |
121 // better) based the rate of visits, whether the page is bookmarked, and | 127 // better) based the rate of visits, whether the page is bookmarked, and |
122 // how often those visits are typed navigations (i.e., explicitly | 128 // how often those visits are typed navigations (i.e., explicitly |
123 // invoked by the user). |now| is passed in to avoid unnecessarily | 129 // invoked by the user). |now| is passed in to avoid unnecessarily |
124 // recomputing it frequently. | 130 // recomputing it frequently. |
125 static float GetFrequency(const base::Time& now, | 131 static float GetFrequency(const base::Time& now, |
126 const bool bookmarked, | 132 const bool bookmarked, |
127 const VisitInfoVector& visits); | 133 const VisitInfoVector& visits); |
128 | 134 |
129 // Combines the two component scores into a final score that's | 135 // Combines the two component scores into a final score that's |
130 // an appropriate value to use as a relevancy score. | 136 // an appropriate value to use as a relevancy score. Scoring buckets are |
137 // specified through |hqp_relevance_buckets|. Please see function | |
Mark P
2015/02/14 01:27:14
see -> see the
Ashok vardhan
2015/02/17 01:23:52
Done.
| |
138 // implementation for more details. | |
131 static float GetFinalRelevancyScore( | 139 static float GetFinalRelevancyScore( |
132 float topicality_score, | 140 float topicality_score, |
133 float frequency_score); | 141 float frequency_score, |
142 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets); | |
143 | |
144 // Initializes the HQP experimental params. | |
Mark P
2015/02/14 01:27:14
This comment is a verb, implying it should go next
Ashok vardhan
2015/02/17 01:23:53
Done.
| |
145 // If the experimental scoring is enabled, it sets: | |
Mark P
2015/02/14 01:27:14
By the way, this comment is wrong as written. In
| |
146 // | |
147 // 1. hqp_experimental to true. Default value is set to false. | |
148 // | |
149 // 2. It initializes the topicality_threshold_, and hqp_scoring from the | |
150 // finch experiment params. | |
151 // | |
152 // 3. topicality_threshold_ is used to control the topicality scoring. | |
153 // If topicality_threshold > 0, then URLs with topicality score < threshold, | |
154 // are given score 0. It is initalized to -1; | |
155 // | |
156 // 4. hqp_relevance_buckets_, buckets that gives the mapping from | |
157 // (topicality*frequency) to the final relevance scoring. | |
158 // Please see GetFinalRelevancyScore() for more details and scoring method. | |
159 static bool hqp_experimental_scoring_enabled_; | |
160 static float topicality_threshold_; | |
161 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_; | |
162 | |
163 static void InitializeHQPExperimentalParams(); | |
134 | 164 |
135 // Sets |also_do_hup_like_scoring_|, | 165 // Sets |also_do_hup_like_scoring_|, |
136 // |max_assigned_score_for_non_inlineable_matches_|, |bookmark_value_|, | 166 // |max_assigned_score_for_non_inlineable_matches_|, |bookmark_value_|, |
137 // |allow_tld_matches_|, and |allow_scheme_matches_| based on the field | 167 // |allow_tld_matches_|, and |allow_scheme_matches_| based on the field |
138 // trial state. | 168 // trial state. |
139 static void Init(); | 169 static void Init(); |
140 | 170 |
141 // An interim score taking into consideration location and completeness | 171 // An interim score taking into consideration location and completeness |
142 // of the match. | 172 // of the match. |
143 int raw_score_; | 173 int raw_score_; |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
199 // matches) because if a non-inlineable match comes first than all matches | 229 // matches) because if a non-inlineable match comes first than all matches |
200 // will get demoted later in HistoryQuickProvider to non-inlineable scores. | 230 // will get demoted later in HistoryQuickProvider to non-inlineable scores. |
201 // Set to -1 to indicate no maximum score. | 231 // Set to -1 to indicate no maximum score. |
202 static int max_assigned_score_for_non_inlineable_matches_; | 232 static int max_assigned_score_for_non_inlineable_matches_; |
203 }; | 233 }; |
204 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; | 234 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
205 | 235 |
206 } // namespace history | 236 } // namespace history |
207 | 237 |
208 #endif // CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ | 238 #endif // CHROME_BROWSER_HISTORY_SCORED_HISTORY_MATCH_H_ |
OLD | NEW |