Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: chrome/browser/autocomplete/bookmark_provider.cc

Issue 10913262: Implement Bookmark Autocomplete Provider (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Now handles duplicate term matching. Comments, etc. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/autocomplete/bookmark_provider.h"
6
7 #include <algorithm>
8 #include <functional>
9 #include <vector>
10
11 #include "base/metrics/histogram.h"
12 #include "base/time.h"
13 #include "chrome/browser/autocomplete/autocomplete_result.h"
14 #include "chrome/browser/bookmarks/bookmark_model.h"
15 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
16 #include "chrome/browser/prefs/pref_service.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/common/pref_names.h"
19 #include "net/base/net_util.h"
20
21 typedef std::vector<bookmark_utils::TitleMatch> TitleMatches;
22
23 // BookmarkProvider ------------------------------------------------------------
24
25 BookmarkProvider::BookmarkProvider(
26 AutocompleteProviderListener* listener,
27 Profile* profile)
28 : AutocompleteProvider(listener, profile,
29 AutocompleteProvider::TYPE_BOOKMARK),
30 bookmark_model_(NULL) {
31 if (profile) {
32 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
33 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
34 }
35 }
36
37 void BookmarkProvider::Start(const AutocompleteInput& input,
38 bool minimal_changes) {
39 if (minimal_changes)
40 return;
41 matches_.clear();
42
43 if (input.text().empty() ||
44 ((input.type() != AutocompleteInput::UNKNOWN) &&
45 (input.type() != AutocompleteInput::REQUESTED_URL) &&
46 (input.type() != AutocompleteInput::QUERY)) ||
47 (input.matches_requested() == AutocompleteInput::BEST_MATCH &&
Peter Kasting 2012/10/10 18:29:06 Nit: Be consistent about whether boolean subexpres
mrossetti 2012/10/15 19:22:46 Done.
48 input.prevent_inline_autocomplete()))
Peter Kasting 2012/10/10 18:29:06 Nit: Might want a comment about why we're bailing
mrossetti 2012/10/15 19:22:46 Done.
49 return;
50
51 base::TimeTicks start_time = base::TimeTicks::Now();
52 DoAutocomplete(input,
53 input.matches_requested() == AutocompleteInput::BEST_MATCH);
54 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime",
55 base::TimeTicks::Now() - start_time);
56 }
57
58 BookmarkProvider::~BookmarkProvider() {}
59
60 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input,
61 bool best_match) {
62 // We may not have a bookmark model for some unit tests.
63 if (!bookmark_model_)
64 return;
65
66 TitleMatches matches;
67 // Retrieve enough bookmarks so that we have a reasonable probability of
68 // retrieving the one that the user desires.
69 const size_t kMaxBookmarkMatches = 50;
70
71 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's
72 // search terms using the following rules:
73 // - The search text is broken up into search terms. Each term is searched
74 // for separately.
75 // - Term matches are always performed against the start of a word. 'def'
76 // will match against 'define' but not against 'indefinite'.
77 // - Terms must be at least three characters in length in order to perform
78 // partial word matches. Any term of lesser length will only be used as an
79 // exact match. 'def' will match against 'define' but 'de' will not match.
80 // - A search containing multiple terms will return results with those words
81 // occuring in any order.
82 // - Terms enclosed in quotes comprises a phrase that must match exactly.
83 // - Multiple terms enclosed in quotes will require those exact words in that
84 // exact order to match.
85 //
86 // Note that the AutocompleteMatch::ACMatchClassification for each match
Mark P 2012/10/10 22:28:57 I don't see how this paragraph is relevant here.
mrossetti 2012/10/15 19:22:46 Reworded.
87 // can never result in a match span greater than the length of the title
88 // against which it is being matched nor can those spans ever overlap because
89 // GetBookmarksWithTitlesMatching compresses and combines the spans for all
90 // terms.
91 //
92 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching
93 // for complete details of how title searches are performed against the user's
94 // bookmarks.
95 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(),
96 kMaxBookmarkMatches,
97 &matches);
98 if (matches.empty())
99 return; // There were no matches.
100 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end();
101 ++i) {
102 AutocompleteMatch match(TitleMatchToACMatch(*i));
103 if (match.relevance > 0)
104 matches_.push_back(match);
105 }
106
107 // Sort and clip the resulting matches.
108 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches;
109 if (matches_.size() > max_matches) {
110 std::partial_sort(matches_.begin(), matches_.end(),
111 matches_.begin() + max_matches,
112 AutocompleteMatch::MoreRelevant);
113 matches_.resize(max_matches);
114 } else {
115 std::sort(matches_.begin(), matches_.end(),
116 AutocompleteMatch::MoreRelevant);
117 }
118 }
119
120 namespace {
121
122 // for_each helper functor that calculates a match factor for each query term
123 // when calculating the final score.
124 //
125 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's
126 // title the term matches, and 2) where the match is positioned within the
127 // bookmark's title. A full length match earns a 1.0. A half-length match earns
128 // at most a 0.5 and at least a 0.25. A single character match against a title
129 // that is 100 characters long where the match is at the first character will
130 // earn a 0.01 and at the last character will earn a 0.0001.
131 class ScoringFunctor {
132 public:
133 // |title_length| is the length of the bookmark title against which this
134 // match will be scored.
135 explicit ScoringFunctor(size_t title_length)
136 : title_length_(static_cast<double>(title_length)),
137 scoring_factor_(0.0) {
138 }
139
140 void operator()(const Snippet::MatchPosition& match) {
141 double term_length = static_cast<double>(match.second - match.first);
142 scoring_factor_ += term_length / title_length_ *
143 (title_length_ - match.first) / title_length_;
Peter Kasting 2012/10/10 18:29:06 Nit: Indent 4 (only function args get indented eve
mrossetti 2012/10/15 19:22:46 Done.
144 }
145
146 double ScoringFactor() { return scoring_factor_; }
147
148 private:
149 double title_length_;
150 double scoring_factor_;
151 };
152
153 } // namespace
154
155 AutocompleteMatch BookmarkProvider::TitleMatchToACMatch(
156 const bookmark_utils::TitleMatch& title_match) {
157 // Compose a match that has the URL of the bookmark and the bookmark's title,
158 // not the URL's page title, as the description. Note that if the relevance
159 // is never changed from 0 that the match will be discarded. false is passed
Peter Kasting 2012/10/10 18:29:06 Nit: Kill passive voice: "The AutocompleteMatch we
mrossetti 2012/10/15 19:22:46 Done.
160 // for the |deletable| parameter in the following constructor to indicate
161 // that the user cannot delete the autocomplete suggestion and, thus, the
162 // the bookmark.
163 AutocompleteMatch match(this, 0, false, AutocompleteMatch::BOOKMARK_TITLE);
164 const string16& title(title_match.node->GetTitle());
165 DCHECK(!title.empty());
166 const GURL& url(title_match.node->url());
167 match.destination_url = url;
168 match.contents = net::FormatUrl(url, languages_,
169 net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP,
170 net::UnescapeRule::SPACES, NULL, NULL, NULL);
171 match.contents_class.push_back(
172 ACMatchClassification(0, ACMatchClassification::NONE));
173 match.fill_into_edit =
174 AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
175 match.contents);
176 match.description = title;
177 match.description_class =
178 ClassificationsFromMatch(title_match.match_positions,
179 match.description.size());
180 match.starred = true;
181
182 // Summary on how a relevance score is determined for the match:
183 //
184 // For each term matching within the bookmark (as given by the set of
185 // ACMatchClassifications) calculate a 'factor', sum up those factors, then
Mark P 2012/10/10 22:28:57 I'm confused. I thought these matches were given
mrossetti 2012/10/15 19:22:46 Correct. I've edited and re-edited these comments
186 // use the sum to figure out a value between the base score and the maximum
187 // score.
188 //
189 // The relevance calculation is based on:
Mark P 2012/10/10 22:28:57 Please divide this into section things: things tha
mrossetti 2012/10/15 19:22:46 Done.
190 // 1) how much of the bookmark's title has been matched by the input text:
191 // (term length / title length),
Mark P 2012/10/10 22:28:57 term length -> total match length
mrossetti 2012/10/15 19:22:46 "term length". I've changed a couple of references
192 // 2) where the matches occurs within the bookmark's title, giving more
193 // points for matches that appear earlier in the title:
194 // ((title length - position of match start) / title_length), and
195 // 3) how many times the bookmark's URL is referenced by other bookmarks
196 // where each additional URL reference up to three boosts the score by
197 // a fixed amount.
198 //
199 // Examples of #1 and #2 above:
200 // 1) Given a bookmark title of 'abcde fghijklm' and two different search
201 // terms, 'abcde' and 'fghijklm', 'fghijklm' will score higher (with a
202 // partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357).
Mark P 2012/10/10 22:28:57 Please revise the text to make it clearer that thi
mrossetti 2012/10/15 19:22:46 I moved the examples to be adjacent to the points
203 // 2) Given a bookmark title of 'abcde fghij' and two different search
204 // terms, 'abcde' and 'fghij', 'abcde' will score higher (with a
205 // match start position of 0 will give a partial factor of
206 // (14-0)/14 = 1.000) than 'fghij' (with a match start position of 6
207 // (14-6)/14 = 0.571).
208 //
209 // See ScoringFunctor, above, for the details on how the factor is calculated.
Mark P 2012/10/10 22:28:57 Only the first two clauses above go into the facto
mrossetti 2012/10/15 19:22:46 Done.
210 ScoringFunctor position_functor =
211 for_each(title_match.match_positions.begin(),
212 title_match.match_positions.end(), ScoringFunctor(title.size()));
Mark P 2012/10/10 22:28:57 Actually, let me change my request. Suppose the b
mrossetti 2012/10/15 19:22:46 There will be two match positions representing bot
Mark P 2012/10/18 18:37:54 Thanks for the explanation.
213 const int kBaseBookmarkScore = 900;
214 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1;
215 const double kBookmarkScoreRange =
216 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
217 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping
218 // matches but let's play it safe.
219 match.relevance = std::min(kMaxBookmarkScore,
220 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) +
221 kBaseBookmarkScore);
222 // Don't waste any time searching for additional referenced URLs if we
223 // already have a perfect title match.
224 if (match.relevance >= kMaxBookmarkScore)
225 return match;
226 // Boost the score if the bookmark's URL is referenced by other bookmarks.
227 const int kURLCountBoost[4] = { 0, 75, 125, 150 };
228 std::vector<const BookmarkNode*> nodes;
229 bookmark_model_->GetNodesByURL(url, &nodes);
230 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
231 match.relevance +=
232 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
233 match.relevance = std::min(kMaxBookmarkScore, match.relevance);
234 return match;
235 }
236
237 // static
238 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
239 const Snippet::MatchPositions& positions,
240 size_t text_length) {
241 ACMatchClassifications classifications;
242 if (positions.empty()) {
243 classifications.push_back(
244 ACMatchClassification(0, ACMatchClassification::NONE));
245 return classifications;
246 }
247
248 for (Snippet::MatchPositions::const_iterator i = positions.begin();
249 i != positions.end(); ++i) {
250 AutocompleteMatch::ACMatchClassifications new_class;
251 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
252 text_length, 0, &new_class);
253 classifications = AutocompleteMatch::MergeClassifications(
254 classifications, new_class);
255 }
256 return classifications;
257 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698