Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ | 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ |
| 6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ | 6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ |
| 7 | 7 |
| 8 #include <stddef.h> | 8 #include <stddef.h> |
| 9 | 9 |
| 10 #include <string> | 10 #include <string> |
| 11 #include <vector> | 11 #include <vector> |
| 12 | 12 |
| 13 #include "base/gtest_prod_util.h" | 13 #include "base/gtest_prod_util.h" |
| 14 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
| 15 #include "base/time/time.h" | 15 #include "base/time/time.h" |
| 16 #include "components/history/core/browser/history_match.h" | 16 #include "components/history/core/browser/history_match.h" |
| 17 #include "components/history/core/browser/history_types.h" | 17 #include "components/history/core/browser/history_types.h" |
| 18 #include "components/omnibox/browser/in_memory_url_index_types.h" | 18 #include "components/omnibox/browser/in_memory_url_index_types.h" |
| 19 #include "components/omnibox/browser/omnibox_field_trial.h" | |
| 19 | 20 |
| 20 class ScoredHistoryMatchTest; | 21 class ScoredHistoryMatchTest; |
| 21 | 22 |
| 22 // An HistoryMatch that has a score as well as metrics defining where in the | 23 // An HistoryMatch that has a score as well as metrics defining where in the |
| 23 // history item's URL and/or page title matches have occurred. | 24 // history item's URL and/or page title matches have occurred. |
| 24 struct ScoredHistoryMatch : public history::HistoryMatch { | 25 struct ScoredHistoryMatch : public history::HistoryMatch { |
| 25 // ScoreMaxRelevance maps from an intermediate-score to the maximum | 26 // ScoreMaxRelevance maps from an intermediate-score to the maximum |
| 26 // final-relevance score given to a URL for this intermediate score. | 27 // final-relevance score given to a URL for this intermediate score. |
| 27 // This is used to store the score ranges of HQP relevance buckets. | 28 // This is used to store the score ranges of HQP relevance buckets. |
| 28 // Please see GetFinalRelevancyScore() for details. | 29 // Please see GetFinalRelevancyScore() for details. |
| 29 typedef std::pair<double, int> ScoreMaxRelevance; | 30 typedef std::pair<double, int> ScoreMaxRelevance; |
| 30 | 31 |
| 31 // Required for STL, we don't use this directly. | 32 // Required for STL, we don't use this directly. |
| 32 ScoredHistoryMatch(); | 33 ScoredHistoryMatch(); |
| 33 ScoredHistoryMatch(const ScoredHistoryMatch& other); | 34 ScoredHistoryMatch(const ScoredHistoryMatch& other); |
| 34 | 35 |
| 35 // Initializes the ScoredHistoryMatch with a raw score calculated for the | 36 // Initializes the ScoredHistoryMatch with a raw score calculated for the |
| 36 // history item given in |row| with recent visits as indicated in |visits|. It | 37 // history item given in |row| with recent visits as indicated in |visits|. It |
| 37 // first determines if the row qualifies by seeing if all of the terms in | 38 // first determines if the row qualifies by seeing if all of the terms in |
| 38 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw | 39 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw |
| 39 // score is in part determined by whether the matches occur at word | 40 // score is in part determined by whether the matches occur at word |
| 40 // boundaries, the locations of which are stored in |word_starts|. For some | 41 // boundaries, the locations of which are stored in |word_starts|. For some |
| 41 // terms, it's appropriate to look for the word boundary within the term. For | 42 // terms, it's appropriate to look for the word boundary within the term. For |
| 42 // instance, the term ".net" should look for a word boundary at the "n". | 43 // instance, the term ".net" should look for a word boundary at the "n". |
| 43 // These offsets (".net" should have an offset of 1) come from | 44 // These offsets (".net" should have an offset of 1) come from |
| 44 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the | 45 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the |
| 45 // match's URL is referenced by any bookmarks, which can also affect the raw | 46 // match's URL is referenced by any bookmarks, which can also affect the raw |
| 46 // score. The raw score allows the matches to be ordered and can be used to | 47 // score. |num_matching_pages| indicates how many URLs in the eligible URL |
| 47 // influence the final score calculated by the client of this index. If the | 48 // database match the user's input; it can also affect the raw score. The raw |
| 48 // row does not qualify the raw score will be 0. | 49 // score allows the matches to be ordered and can be used to influence the |
| 50 // final score calculated by the client of this index. If the row does not | |
| 51 // qualify the raw score will be 0. | |
| 49 ScoredHistoryMatch(const history::URLRow& row, | 52 ScoredHistoryMatch(const history::URLRow& row, |
| 50 const VisitInfoVector& visits, | 53 const VisitInfoVector& visits, |
| 51 const base::string16& lower_string, | 54 const base::string16& lower_string, |
| 52 const String16Vector& terms_vector, | 55 const String16Vector& terms_vector, |
| 53 const WordStarts& terms_to_word_starts_offsets, | 56 const WordStarts& terms_to_word_starts_offsets, |
| 54 const RowWordStarts& word_starts, | 57 const RowWordStarts& word_starts, |
| 55 bool is_url_bookmarked, | 58 bool is_url_bookmarked, |
| 59 size_t num_matching_pages, | |
| 56 base::Time now); | 60 base::Time now); |
| 57 | 61 |
| 58 ~ScoredHistoryMatch(); | 62 ~ScoredHistoryMatch(); |
| 59 | 63 |
| 60 // Compares two matches by score. Functor supporting URLIndexPrivateData's | 64 // Compares two matches by score. Functor supporting URLIndexPrivateData's |
| 61 // HistoryItemsForTerms function. Looks at particular fields within | 65 // HistoryItemsForTerms function. Looks at particular fields within |
| 62 // with url_info to make tie-breaking a bit smarter. | 66 // with url_info to make tie-breaking a bit smarter. |
| 63 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, | 67 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, |
| 64 const ScoredHistoryMatch& m2); | 68 const ScoredHistoryMatch& m2); |
| 65 | 69 |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 86 // unimportant matches are eliminated by GetTopicalityScore(), called | 90 // unimportant matches are eliminated by GetTopicalityScore(), called |
| 87 // during construction.) | 91 // during construction.) |
| 88 | 92 |
| 89 // Term matches within the URL. | 93 // Term matches within the URL. |
| 90 TermMatches url_matches; | 94 TermMatches url_matches; |
| 91 // Term matches within the page title. | 95 // Term matches within the page title. |
| 92 TermMatches title_matches; | 96 TermMatches title_matches; |
| 93 | 97 |
| 94 private: | 98 private: |
| 95 friend class ScoredHistoryMatchTest; | 99 friend class ScoredHistoryMatchTest; |
| 100 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetDocumentSpecificityScore); | |
| 96 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore); | 101 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore); |
| 97 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFrequency); | 102 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFrequency); |
| 98 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetHQPBucketsFromString); | 103 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetHQPBucketsFromString); |
| 99 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks); | 104 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks); |
| 100 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme); | 105 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme); |
| 101 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD); | 106 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD); |
| 102 | 107 |
| 103 // Initialize ScoredHistoryMatch statics. Must be called before any other | 108 // Initialize ScoredHistoryMatch statics. Must be called before any other |
| 104 // method of ScoredHistoryMatch and before creating any instances. | 109 // method of ScoredHistoryMatch and before creating any instances. |
| 105 static void Init(); | 110 static void Init(); |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 121 | 126 |
| 122 // Examines the first |max_visits_to_score_| and returns a score (higher is | 127 // Examines the first |max_visits_to_score_| and returns a score (higher is |
| 123 // better) based the rate of visits, whether the page is bookmarked, and | 128 // better) based the rate of visits, whether the page is bookmarked, and |
| 124 // how often those visits are typed navigations (i.e., explicitly | 129 // how often those visits are typed navigations (i.e., explicitly |
| 125 // invoked by the user). |now| is passed in to avoid unnecessarily | 130 // invoked by the user). |now| is passed in to avoid unnecessarily |
| 126 // recomputing it frequently. | 131 // recomputing it frequently. |
| 127 float GetFrequency(const base::Time& now, | 132 float GetFrequency(const base::Time& now, |
| 128 const bool bookmarked, | 133 const bool bookmarked, |
| 129 const VisitInfoVector& visits) const; | 134 const VisitInfoVector& visits) const; |
| 130 | 135 |
| 131 // Combines the two component scores into a final score that's | 136 // Returns a document specificity score based on how many pages matched the |
| 137 // user's input. | |
| 138 float GetDocumentSpecificityScore(const size_t num_matching_pages) const; | |
|
Peter Kasting
2016/12/06 05:19:30
I'd avoid declaring the parameter here const.
Mark P
2016/12/08 00:21:31
Done.
| |
| 139 | |
| 140 // Combines the three component scores into a final score that's | |
| 132 // an appropriate value to use as a relevancy score. Scoring buckets are | 141 // an appropriate value to use as a relevancy score. Scoring buckets are |
| 133 // specified through |hqp_relevance_buckets|. Please see the function | 142 // specified through |hqp_relevance_buckets|. Please see the function |
| 134 // implementation for more details. | 143 // implementation for more details. |
| 135 static float GetFinalRelevancyScore( | 144 static float GetFinalRelevancyScore( |
| 136 float topicality_score, | 145 float topicality_score, |
| 137 float frequency_score, | 146 float frequency_score, |
| 147 float specificity_score, | |
| 138 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets); | 148 const std::vector<ScoreMaxRelevance>& hqp_relevance_buckets); |
| 139 | 149 |
| 140 // Initializes the HQP experimental params: |hqp_relevance_buckets_| | 150 // Initializes the HQP experimental params: |hqp_relevance_buckets_| |
| 141 // to default buckets. If hqp experimental scoring is enabled, it | 151 // to default buckets. If hqp experimental scoring is enabled, it |
| 142 // fetches the |hqp_experimental_scoring_enabled_|, |topicality_threshold_| | 152 // fetches the |hqp_experimental_scoring_enabled_|, |topicality_threshold_| |
| 143 // and |hqp_relevance_buckets_| from omnibox field trials. | 153 // and |hqp_relevance_buckets_| from omnibox field trials. |
| 144 static void InitHQPExperimentalParams(); | 154 static void InitHQPExperimentalParams(); |
| 145 | 155 |
| 146 // Helper function to parse the string containing the scoring buckets. | 156 // Helper function to parse the string containing the scoring buckets. |
| 147 // For example, | 157 // For example, |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 164 static float typed_value_; | 174 static float typed_value_; |
| 165 | 175 |
| 166 // True if we should fix a bug in frequency scoring relating to how we | 176 // True if we should fix a bug in frequency scoring relating to how we |
| 167 // extrapolate frecency when the URL has been visited few times. | 177 // extrapolate frecency when the URL has been visited few times. |
| 168 static bool fix_few_visits_bug_; | 178 static bool fix_few_visits_bug_; |
| 169 | 179 |
| 170 // Determines whether GetFrequency() returns a score based on on the weighted | 180 // Determines whether GetFrequency() returns a score based on on the weighted |
| 171 // sum of visit scores instead of the weighted average. | 181 // sum of visit scores instead of the weighted average. |
| 172 static bool frequency_uses_sum_; | 182 static bool frequency_uses_sum_; |
| 173 | 183 |
| 184 // A mapping from the number of matching pages to their associated document | |
| 185 // specificity scores. See omnibox_field_trial.h for more details. | |
| 186 static OmniboxFieldTrial::NumMatchesScores* | |
| 187 num_matches_to_document_specificity_score_; | |
| 188 | |
| 174 // The maximum number of recent visits to examine in GetFrequency(). | 189 // The maximum number of recent visits to examine in GetFrequency(). |
| 175 static size_t max_visits_to_score_; | 190 static size_t max_visits_to_score_; |
| 176 | 191 |
| 177 // If true, we allow input terms to match in the TLD (e.g., ".com"). | 192 // If true, we allow input terms to match in the TLD (e.g., ".com"). |
| 178 static bool allow_tld_matches_; | 193 static bool allow_tld_matches_; |
| 179 | 194 |
| 180 // If true, we allow input terms to match in the scheme (e.g., "http://"). | 195 // If true, we allow input terms to match in the scheme (e.g., "http://"). |
| 181 static bool allow_scheme_matches_; | 196 static bool allow_scheme_matches_; |
| 182 | 197 |
| 183 // The number of title words examined when computing topicality scores. | 198 // The number of title words examined when computing topicality scores. |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 197 static char hqp_relevance_buckets_str_[]; | 212 static char hqp_relevance_buckets_str_[]; |
| 198 | 213 |
| 199 // |hqp_relevance_buckets_| gives mapping from (topicality*frequency) | 214 // |hqp_relevance_buckets_| gives mapping from (topicality*frequency) |
| 200 // to the final relevance scoring. Please see GetFinalRelevancyScore() | 215 // to the final relevance scoring. Please see GetFinalRelevancyScore() |
| 201 // for more details and scoring method. | 216 // for more details and scoring method. |
| 202 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_; | 217 static std::vector<ScoreMaxRelevance>* hqp_relevance_buckets_; |
| 203 }; | 218 }; |
| 204 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; | 219 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; |
| 205 | 220 |
| 206 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ | 221 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ |
| OLD | NEW |