Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(170)

Side by Side Diff: components/omnibox/browser/scored_history_match.h

Issue 2541143002: Omnibox - Boost Frequency Scores Based on Number of Matching Pages (Closed)
Patch Set: fix rebase errors Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_
6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ 6 #define COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_
7 7
8 #include <stddef.h> 8 #include <stddef.h>
9 9
10 #include <string> 10 #include <string>
11 #include <vector> 11 #include <vector>
12 12
13 #include "base/gtest_prod_util.h" 13 #include "base/gtest_prod_util.h"
14 #include "base/strings/string16.h" 14 #include "base/strings/string16.h"
15 #include "base/time/time.h" 15 #include "base/time/time.h"
16 #include "components/history/core/browser/history_match.h" 16 #include "components/history/core/browser/history_match.h"
17 #include "components/history/core/browser/history_types.h" 17 #include "components/history/core/browser/history_types.h"
18 #include "components/omnibox/browser/in_memory_url_index_types.h" 18 #include "components/omnibox/browser/in_memory_url_index_types.h"
19 #include "components/omnibox/browser/omnibox_field_trial.h"
19 20
20 class ScoredHistoryMatchTest; 21 class ScoredHistoryMatchTest;
21 22
22 // An HistoryMatch that has a score as well as metrics defining where in the 23 // An HistoryMatch that has a score as well as metrics defining where in the
23 // history item's URL and/or page title matches have occurred. 24 // history item's URL and/or page title matches have occurred.
24 struct ScoredHistoryMatch : public history::HistoryMatch { 25 struct ScoredHistoryMatch : public history::HistoryMatch {
25 // ScoreMaxRelevance maps from an intermediate-score to the maximum 26 // ScoreMaxRelevance maps from an intermediate-score to the maximum
26 // final-relevance score given to a URL for this intermediate score. 27 // final-relevance score given to a URL for this intermediate score.
27 // This is used to store the score ranges of relevance buckets. 28 // This is used to store the score ranges of relevance buckets.
28 // Please see GetFinalRelevancyScore() for details. 29 // Please see GetFinalRelevancyScore() for details.
(...skipping 12 matching lines...) Expand all
41 // history item given in |row| with recent visits as indicated in |visits|. It 42 // history item given in |row| with recent visits as indicated in |visits|. It
42 // first determines if the row qualifies by seeing if all of the terms in 43 // first determines if the row qualifies by seeing if all of the terms in
43 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw 44 // |terms_vector| occur in |row|. If so, calculates a raw score. This raw
44 // score is in part determined by whether the matches occur at word 45 // score is in part determined by whether the matches occur at word
45 // boundaries, the locations of which are stored in |word_starts|. For some 46 // boundaries, the locations of which are stored in |word_starts|. For some
46 // terms, it's appropriate to look for the word boundary within the term. For 47 // terms, it's appropriate to look for the word boundary within the term. For
47 // instance, the term ".net" should look for a word boundary at the "n". 48 // instance, the term ".net" should look for a word boundary at the "n".
48 // These offsets (".net" should have an offset of 1) come from 49 // These offsets (".net" should have an offset of 1) come from
49 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the 50 // |terms_to_word_starts_offsets|. |is_url_bookmarked| indicates whether the
50 // match's URL is referenced by any bookmarks, which can also affect the raw 51 // match's URL is referenced by any bookmarks, which can also affect the raw
51 // score. The raw score allows the matches to be ordered and can be used to 52 // score. |num_matching_pages| indicates how many URLs in the eligible URL
52 // influence the final score calculated by the client of this index. If the 53 // database match the user's input; it can also affect the raw score. The raw
53 // row does not qualify the raw score will be 0. 54 // score allows the matches to be ordered and can be used to influence the
55 // final score calculated by the client of this index. If the row does not
56 // qualify the raw score will be 0.
54 ScoredHistoryMatch(const history::URLRow& row, 57 ScoredHistoryMatch(const history::URLRow& row,
55 const VisitInfoVector& visits, 58 const VisitInfoVector& visits,
56 const base::string16& lower_string, 59 const base::string16& lower_string,
57 const String16Vector& terms_vector, 60 const String16Vector& terms_vector,
58 const WordStarts& terms_to_word_starts_offsets, 61 const WordStarts& terms_to_word_starts_offsets,
59 const RowWordStarts& word_starts, 62 const RowWordStarts& word_starts,
60 bool is_url_bookmarked, 63 bool is_url_bookmarked,
64 size_t num_matching_pages,
61 base::Time now); 65 base::Time now);
62 66
63 ~ScoredHistoryMatch(); 67 ~ScoredHistoryMatch();
64 68
65 // Compares two matches by score. Functor supporting URLIndexPrivateData's 69 // Compares two matches by score. Functor supporting URLIndexPrivateData's
66 // HistoryItemsForTerms function. Looks at particular fields within 70 // HistoryItemsForTerms function. Looks at particular fields within
67 // with url_info to make tie-breaking a bit smarter. 71 // with url_info to make tie-breaking a bit smarter.
68 static bool MatchScoreGreater(const ScoredHistoryMatch& m1, 72 static bool MatchScoreGreater(const ScoredHistoryMatch& m1,
69 const ScoredHistoryMatch& m2); 73 const ScoredHistoryMatch& m2);
70 74
(...skipping 20 matching lines...) Expand all
91 // unimportant matches are eliminated by GetTopicalityScore(), called 95 // unimportant matches are eliminated by GetTopicalityScore(), called
92 // during construction.) 96 // during construction.)
93 97
94 // Term matches within the URL. 98 // Term matches within the URL.
95 TermMatches url_matches; 99 TermMatches url_matches;
96 // Term matches within the page title. 100 // Term matches within the page title.
97 TermMatches title_matches; 101 TermMatches title_matches;
98 102
99 private: 103 private:
100 friend class ScoredHistoryMatchTest; 104 friend class ScoredHistoryMatchTest;
105 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetDocumentSpecificityScore);
101 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore); 106 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFinalRelevancyScore);
102 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFrequency); 107 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetFrequency);
103 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetHQPBucketsFromString); 108 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, GetHQPBucketsFromString);
104 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks); 109 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringBookmarks);
105 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme); 110 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringScheme);
106 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD); 111 FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchTest, ScoringTLD);
107 112
108 // Initialize ScoredHistoryMatch statics. Must be called before any other 113 // Initialize ScoredHistoryMatch statics. Must be called before any other
109 // method of ScoredHistoryMatch and before creating any instances. 114 // method of ScoredHistoryMatch and before creating any instances.
110 static void Init(); 115 static void Init();
(...skipping 15 matching lines...) Expand all
126 131
127 // Examines the first |max_visits_to_score_| and returns a score (higher is 132 // Examines the first |max_visits_to_score_| and returns a score (higher is
128 // better) based the rate of visits, whether the page is bookmarked, and 133 // better) based the rate of visits, whether the page is bookmarked, and
129 // how often those visits are typed navigations (i.e., explicitly 134 // how often those visits are typed navigations (i.e., explicitly
130 // invoked by the user). |now| is passed in to avoid unnecessarily 135 // invoked by the user). |now| is passed in to avoid unnecessarily
131 // recomputing it frequently. 136 // recomputing it frequently.
132 float GetFrequency(const base::Time& now, 137 float GetFrequency(const base::Time& now,
133 const bool bookmarked, 138 const bool bookmarked,
134 const VisitInfoVector& visits) const; 139 const VisitInfoVector& visits) const;
135 140
136 // Combines the two component scores into a final score that's an appropriate 141 // Returns a document specificity score based on how many pages matched the
137 // value to use as a relevancy score. 142 // user's input.
143 float GetDocumentSpecificityScore(size_t num_matching_pages) const;
144
145 // Combines the three component scores into a final score that's
146 // an appropriate value to use as a relevancy score.
138 static float GetFinalRelevancyScore(float topicality_score, 147 static float GetFinalRelevancyScore(float topicality_score,
139 float frequency_score); 148 float frequency_score,
149 float specificity_score);
140 150
141 // Helper function that returns the string containing the scoring buckets 151 // Helper function that returns the string containing the scoring buckets
142 // (either the default ones or ones specified in an experiment). 152 // (either the default ones or ones specified in an experiment).
143 static ScoreMaxRelevances GetHQPBuckets(); 153 static ScoreMaxRelevances GetHQPBuckets();
144 154
145 // Helper function to parse the string containing the scoring buckets and 155 // Helper function to parse the string containing the scoring buckets and
146 // return the results. For example, with |buckets_str| as 156 // return the results. For example, with |buckets_str| as
147 // "0.0:400,1.5:600,12.0:1300,20.0:1399", it returns [(0.0, 400), (1.5, 600), 157 // "0.0:400,1.5:600,12.0:1300,20.0:1399", it returns [(0.0, 400), (1.5, 600),
148 // (12.0, 1300), (20.0, 1399)]. It returns an empty vector in the case of a 158 // (12.0, 1300), (20.0, 1399)]. It returns an empty vector in the case of a
149 // malformed |buckets_str|. 159 // malformed |buckets_str|.
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
184 194
185 // |topicality_threshold_| is used to control the topicality scoring. 195 // |topicality_threshold_| is used to control the topicality scoring.
186 // If |topicality_threshold_| > 0, then URLs with topicality-score less than 196 // If |topicality_threshold_| > 0, then URLs with topicality-score less than
187 // the threshold are given topicality score of 0. 197 // the threshold are given topicality score of 0.
188 static float topicality_threshold_; 198 static float topicality_threshold_;
189 199
190 // Used for testing. A possibly null pointer to a vector. If set, 200 // Used for testing. A possibly null pointer to a vector. If set,
191 // overrides the static local variable |relevance_buckets| declared in 201 // overrides the static local variable |relevance_buckets| declared in
192 // GetFinalRelevancyScore(). 202 // GetFinalRelevancyScore().
193 static ScoreMaxRelevances* relevance_buckets_override_; 203 static ScoreMaxRelevances* relevance_buckets_override_;
204
205 // Used for testing. If this pointer is not null, it overrides the static
206 // local variable |default_matches_to_specificity| declared in
207 // GetDocumentSpecificityScore().
208 static OmniboxFieldTrial::NumMatchesScores* matches_to_specificity_override_;
194 }; 209 };
195 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches; 210 typedef std::vector<ScoredHistoryMatch> ScoredHistoryMatches;
196 211
197 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_ 212 #endif // COMPONENTS_OMNIBOX_BROWSER_SCORED_HISTORY_MATCH_H_
OLDNEW
« no previous file with comments | « components/omnibox/browser/omnibox_field_trial.cc ('k') | components/omnibox/browser/scored_history_match.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698