|
OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/autocomplete/bookmark_provider.h" | |
6 | |
7 #include <algorithm> | |
8 #include <functional> | |
9 #include <vector> | |
10 | |
11 #include "base/metrics/histogram.h" | |
12 #include "base/time.h" | |
13 #include "chrome/browser/autocomplete/autocomplete_result.h" | |
14 #include "chrome/browser/bookmarks/bookmark_model.h" | |
15 #include "chrome/browser/bookmarks/bookmark_model_factory.h" | |
16 #include "chrome/browser/prefs/pref_service.h" | |
17 #include "chrome/browser/profiles/profile.h" | |
18 #include "chrome/common/pref_names.h" | |
19 #include "net/base/net_util.h" | |
20 | |
21 typedef std::vector<bookmark_utils::TitleMatch> TitleMatches; | |
22 | |
23 // BookmarkProvider ------------------------------------------------------------ | |
24 | |
25 BookmarkProvider::BookmarkProvider( | |
26 AutocompleteProviderListener* listener, | |
27 Profile* profile) | |
28 : AutocompleteProvider(listener, profile, | |
29 AutocompleteProvider::TYPE_BOOKMARK), | |
30 bookmark_model_(NULL) { | |
31 if (profile) { | |
32 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); | |
33 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); | |
34 } | |
35 } | |
36 | |
37 void BookmarkProvider::Start(const AutocompleteInput& input, | |
38 bool minimal_changes) { | |
39 if (minimal_changes) | |
40 return; | |
41 matches_.clear(); | |
42 | |
43 // Short-circuit any matching when inline autocompletion is disabled and | |
44 // we're looking for BEST_MATCH because none of the BookmarkProvider's | |
45 // matches can score high enough to qualify. | |
46 if (input.text().empty() || | |
47 ((input.type() != AutocompleteInput::UNKNOWN) && | |
48 (input.type() != AutocompleteInput::REQUESTED_URL) && | |
49 (input.type() != AutocompleteInput::QUERY)) || | |
50 ((input.matches_requested() == AutocompleteInput::BEST_MATCH) && | |
51 input.prevent_inline_autocomplete())) | |
52 return; | |
53 | |
54 base::TimeTicks start_time = base::TimeTicks::Now(); | |
55 DoAutocomplete(input, | |
56 input.matches_requested() == AutocompleteInput::BEST_MATCH); | |
57 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime", | |
58 base::TimeTicks::Now() - start_time); | |
59 } | |
60 | |
61 BookmarkProvider::~BookmarkProvider() {} | |
62 | |
63 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, | |
64 bool best_match) { | |
65 // We may not have a bookmark model for some unit tests. | |
66 if (!bookmark_model_) | |
67 return; | |
68 | |
69 TitleMatches matches; | |
70 // Retrieve enough bookmarks so that we have a reasonable probability of | |
71 // suggesting the one that the user desires. | |
72 const size_t kMaxBookmarkMatches = 50; | |
73 | |
74 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's | |
75 // search terms using the following rules: | |
76 // - The search text is broken up into search terms. Each term is searched | |
77 // for separately. | |
78 // - Term matches are always performed against the start of a word. 'def' | |
79 // will match against 'define' but not against 'indefinite'. | |
80 // - Terms must be at least three characters in length in order to perform | |
81 // partial word matches. Any term of lesser length will only be used as an | |
82 // exact match. 'def' will match against 'define' but 'de' will not match. | |
83 // - A search containing multiple terms will return results with those words | |
84 // occuring in any order. | |
85 // - Terms enclosed in quotes comprises a phrase that must match exactly. | |
86 // - Multiple terms enclosed in quotes will require those exact words in that | |
87 // exact order to match. | |
88 // | |
89 // Note: GetBookmarksWithTitlesMatching() will never return a match span | |
90 // greater than the length of the title against which it is being matched, | |
91 // nor can those spans ever overlap because the match spans are coalesced | |
92 // for all matched terms. | |
93 // | |
94 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching | |
95 // for complete details of how title searches are performed against the user's | |
96 // bookmarks. | |
97 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), | |
98 kMaxBookmarkMatches, | |
99 &matches); | |
100 if (matches.empty()) | |
101 return; // There were no matches. | |
102 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); | |
103 ++i) { | |
104 AutocompleteMatch match(TitleMatchToACMatch(*i)); | |
105 if (match.relevance > 0) | |
106 matches_.push_back(match); | |
107 } | |
108 | |
109 // Sort and clip the resulting matches. | |
110 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; | |
111 if (matches_.size() > max_matches) { | |
112 std::partial_sort(matches_.begin(), matches_.end(), | |
113 matches_.begin() + max_matches, | |
114 AutocompleteMatch::MoreRelevant); | |
115 matches_.resize(max_matches); | |
116 } else { | |
117 std::sort(matches_.begin(), matches_.end(), | |
118 AutocompleteMatch::MoreRelevant); | |
119 } | |
120 } | |
121 | |
122 namespace { | |
123 | |
124 // for_each helper functor that calculates a match factor for each query term | |
125 // when calculating the final score. | |
126 // | |
127 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's | |
128 // title the term matches, and 2) where the match is positioned within the | |
129 // bookmark's title. A full length match earns a 1.0. A half-length match earns | |
130 // at most a 0.5 and at least a 0.25. A single character match against a title | |
131 // that is 100 characters long where the match is at the first character will | |
132 // earn a 0.01 and at the last character will earn a 0.0001. | |
133 class ScoringFunctor { | |
134 public: | |
135 // |title_length| is the length of the bookmark title against which this | |
136 // match will be scored. | |
137 explicit ScoringFunctor(size_t title_length) | |
138 : title_length_(static_cast<double>(title_length)), | |
139 scoring_factor_(0.0) { | |
140 } | |
141 | |
142 void operator()(const Snippet::MatchPosition& match) { | |
143 double term_length = static_cast<double>(match.second - match.first); | |
144 scoring_factor_ += term_length / title_length_ * | |
145 (title_length_ - match.first) / title_length_; | |
146 } | |
147 | |
148 double ScoringFactor() { return scoring_factor_; } | |
149 | |
150 private: | |
151 double title_length_; | |
152 double scoring_factor_; | |
153 }; | |
154 | |
155 } // namespace | |
156 | |
157 AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( | |
Mark P
2012/10/18 18:37:54
I found this comment that you deleted useful: Note
mrossetti
2012/10/19 17:33:07
I moved part of the original comment you liked to
| |
158 const bookmark_utils::TitleMatch& title_match) { | |
159 // The AutocompleteMatch we construct is non-deletable because the only | |
160 // way to support this would be to delete the underlying bookmark, which is | |
161 // unlikely to be what the user intends. | |
162 AutocompleteMatch match(this, 0, false, AutocompleteMatch::BOOKMARK_TITLE); | |
163 const string16& title(title_match.node->GetTitle()); | |
164 DCHECK(!title.empty()); | |
165 const GURL& url(title_match.node->url()); | |
166 match.destination_url = url; | |
167 match.contents = net::FormatUrl(url, languages_, | |
168 net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP, | |
169 net::UnescapeRule::SPACES, NULL, NULL, NULL); | |
170 match.contents_class.push_back( | |
171 ACMatchClassification(0, ACMatchClassification::NONE)); | |
172 match.fill_into_edit = | |
173 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, | |
174 match.contents); | |
175 match.description = title; | |
176 match.description_class = | |
177 ClassificationsFromMatch(title_match.match_positions, | |
178 match.description.size()); | |
179 match.starred = true; | |
180 | |
181 // Summary on how a relevance score is determined for the match: | |
182 // | |
183 // For each term matching within the bookmark's title (as given by the set of | |
184 // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then | |
185 // use the sum to figure out a value between the base score and the maximum | |
186 // score. | |
187 // | |
188 // The factor for each term is calculated based on: | |
189 // | |
190 // 1) how much of the bookmark's title has been matched by the term: | |
191 // (term length / title length). | |
192 // | |
193 // Example: Given a bookmark title 'abcde fghijklm', with a title length | |
194 // of 14, and two different search terms, 'abcde' and 'fghijklm', with | |
195 // term lengths of 5 and 8, respectively, 'fghijklm' will score higher | |
196 // (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). | |
197 // | |
198 // 2) where the term match occurs within the bookmark's title, giving more | |
199 // points for matches that appear earlier in the title: | |
200 // ((title length - position of match start) / title_length). | |
201 // | |
202 // Example: Given a bookmark title of 'abcde fghijklm', with a title length | |
203 // of 14, and two different search terms, 'abcde' and 'fghij', with | |
204 // start positions of 0 and 6, respectively, 'abcde' will score higher | |
205 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with | |
206 // a partial factor of (14-6)/14 = 0.571 ). | |
207 // | |
208 // Once all term factors have been calculated they are summed. The resulting | |
Mark P
2012/10/18 18:37:54
This first sentence isn't true. You multiple the
mrossetti
2012/10/19 17:33:07
Please take a look at ScoringFunctor::operator() a
Mark P
2012/10/19 17:50:44
Ah, I see why I thought your comment was misleadin
| |
209 // sum will never be greater than 1.0. This sum is then multiplied against | |
Mark P
2012/10/18 18:37:54
Why will it never be greater than 1.0?
I think it
mrossetti
2012/10/19 17:33:07
The math is pure. It can never be greater than 1.0
Mark P
2012/10/19 17:50:44
Consider the title "food" and omnibox input "foo o
| |
210 // the scoring range available, which is 299. The 299 is calculated by | |
211 // subtracting the minimum possible score, 900, from the maximum possible | |
212 // score, 1199. This product, ranging from 0 to 299, is added to the minimum | |
213 // possible score, 900, giving the preliminary score. | |
214 // | |
215 // If the preliminary score is less than the maximum possible score, 1199, | |
216 // it can be boosted if the URL referenced by the bookmark is also referenced | |
Mark P
2012/10/18 18:37:54
boosted if
->
boosted up to the maximum possible s
mrossetti
2012/10/19 17:33:07
Done.
| |
217 // by any of the user's other bookmarks. A count of how many times the | |
218 // bookmark's URL is referenced is determined and, for each additional | |
219 // reference beyond the one for the bookmark being scored up to a maximum | |
220 // of three, the score is boosted by a fixed amount given by |kURLCountBoost|, | |
221 // below. As it is possible for this boost to cause the score to exceed the | |
222 // maximum possible score, the score is capped to that maximum possible. | |
223 // | |
224 ScoringFunctor position_functor = | |
225 for_each(title_match.match_positions.begin(), | |
226 title_match.match_positions.end(), ScoringFunctor(title.size())); | |
227 const int kBaseBookmarkScore = 900; | |
228 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; | |
229 const double kBookmarkScoreRange = | |
230 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); | |
231 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping | |
232 // matches but let's play it safe. | |
233 match.relevance = std::min(kMaxBookmarkScore, | |
234 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + | |
235 kBaseBookmarkScore); | |
236 // Don't waste any time searching for additional referenced URLs if we | |
237 // already have a perfect title match. | |
238 if (match.relevance >= kMaxBookmarkScore) | |
239 return match; | |
240 // Boost the score if the bookmark's URL is referenced by other bookmarks. | |
241 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; | |
242 std::vector<const BookmarkNode*> nodes; | |
243 bookmark_model_->GetNodesByURL(url, &nodes); | |
244 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); | |
245 match.relevance += | |
246 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; | |
247 match.relevance = std::min(kMaxBookmarkScore, match.relevance); | |
248 return match; | |
249 } | |
250 | |
251 // static | |
252 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( | |
253 const Snippet::MatchPositions& positions, | |
254 size_t text_length) { | |
255 ACMatchClassifications classifications; | |
256 if (positions.empty()) { | |
257 classifications.push_back( | |
258 ACMatchClassification(0, ACMatchClassification::NONE)); | |
259 return classifications; | |
260 } | |
261 | |
262 for (Snippet::MatchPositions::const_iterator i = positions.begin(); | |
263 i != positions.end(); ++i) { | |
264 AutocompleteMatch::ACMatchClassifications new_class; | |
265 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, | |
266 text_length, 0, &new_class); | |
267 classifications = AutocompleteMatch::MergeClassifications( | |
268 classifications, new_class); | |
269 } | |
270 return classifications; | |
271 } | |
OLD | NEW |