Chromium Code Reviews
|
| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/autocomplete/bookmark_provider.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 #include <functional> | |
| 9 #include <vector> | |
| 10 | |
| 11 #include "base/metrics/histogram.h" | |
| 12 #include "base/time.h" | |
| 13 #include "chrome/browser/autocomplete/autocomplete_result.h" | |
| 14 #include "chrome/browser/bookmarks/bookmark_model.h" | |
| 15 #include "chrome/browser/bookmarks/bookmark_model_factory.h" | |
| 16 #include "chrome/browser/prefs/pref_service.h" | |
| 17 #include "chrome/browser/profiles/profile.h" | |
| 18 #include "chrome/common/pref_names.h" | |
| 19 #include "net/base/net_util.h" | |
| 20 | |
| 21 typedef std::vector<bookmark_utils::TitleMatch> TitleMatches; | |
| 22 | |
| 23 // BookmarkProvider ------------------------------------------------------------ | |
| 24 | |
| 25 BookmarkProvider::BookmarkProvider( | |
| 26 AutocompleteProviderListener* listener, | |
| 27 Profile* profile) | |
| 28 : AutocompleteProvider(listener, profile, | |
| 29 AutocompleteProvider::TYPE_BOOKMARK), | |
| 30 bookmark_model_(NULL) { | |
| 31 if (profile) { | |
| 32 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); | |
| 33 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); | |
| 34 } | |
| 35 } | |
| 36 | |
| 37 void BookmarkProvider::Start(const AutocompleteInput& input, | |
| 38 bool minimal_changes) { | |
| 39 if (minimal_changes) | |
| 40 return; | |
| 41 matches_.clear(); | |
| 42 | |
| 43 if (input.text().empty() || | |
| 44 ((input.type() != AutocompleteInput::UNKNOWN) && | |
| 45 (input.type() != AutocompleteInput::REQUESTED_URL) && | |
| 46 (input.type() != AutocompleteInput::QUERY)) || | |
| 47 (input.matches_requested() == AutocompleteInput::BEST_MATCH && | |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Be consistent about whether boolean subexpres
mrossetti
2012/10/15 19:22:46
Done.
| |
| 48 input.prevent_inline_autocomplete())) | |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Might want a comment about why we're bailing
mrossetti
2012/10/15 19:22:46
Done.
| |
| 49 return; | |
| 50 | |
| 51 base::TimeTicks start_time = base::TimeTicks::Now(); | |
| 52 DoAutocomplete(input, | |
| 53 input.matches_requested() == AutocompleteInput::BEST_MATCH); | |
| 54 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime", | |
| 55 base::TimeTicks::Now() - start_time); | |
| 56 } | |
| 57 | |
| 58 BookmarkProvider::~BookmarkProvider() {} | |
| 59 | |
| 60 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, | |
| 61 bool best_match) { | |
| 62 // We may not have a bookmark model for some unit tests. | |
| 63 if (!bookmark_model_) | |
| 64 return; | |
| 65 | |
| 66 TitleMatches matches; | |
| 67 // Retrieve enough bookmarks so that we have a reasonable probability of | |
| 68 // retrieving the one that the user desires. | |
| 69 const size_t kMaxBookmarkMatches = 50; | |
| 70 | |
| 71 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's | |
| 72 // search terms using the following rules: | |
| 73 // - The search text is broken up into search terms. Each term is searched | |
| 74 // for separately. | |
| 75 // - Term matches are always performed against the start of a word. 'def' | |
| 76 // will match against 'define' but not against 'indefinite'. | |
| 77 // - Terms must be at least three characters in length in order to perform | |
| 78 // partial word matches. Any term of lesser length will only be used as an | |
| 79 // exact match. 'def' will match against 'define' but 'de' will not match. | |
| 80 // - A search containing multiple terms will return results with those words | |
| 81 // occuring in any order. | |
| 82 // - Terms enclosed in quotes comprises a phrase that must match exactly. | |
| 83 // - Multiple terms enclosed in quotes will require those exact words in that | |
| 84 // exact order to match. | |
| 85 // | |
| 86 // Note that the AutocompleteMatch::ACMatchClassification for each match | |
|
Mark P
2012/10/10 22:28:57
I don't see how this paragraph is relevant here.
mrossetti
2012/10/15 19:22:46
Reworded.
| |
| 87 // can never result in a match span greater than the length of the title | |
| 88 // against which it is being matched nor can those spans ever overlap because | |
| 89 // GetBookmarksWithTitlesMatching compresses and combines the spans for all | |
| 90 // terms. | |
| 91 // | |
| 92 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching | |
| 93 // for complete details of how title searches are performed against the user's | |
| 94 // bookmarks. | |
| 95 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), | |
| 96 kMaxBookmarkMatches, | |
| 97 &matches); | |
| 98 if (matches.empty()) | |
| 99 return; // There were no matches. | |
| 100 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); | |
| 101 ++i) { | |
| 102 AutocompleteMatch match(TitleMatchToACMatch(*i)); | |
| 103 if (match.relevance > 0) | |
| 104 matches_.push_back(match); | |
| 105 } | |
| 106 | |
| 107 // Sort and clip the resulting matches. | |
| 108 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; | |
| 109 if (matches_.size() > max_matches) { | |
| 110 std::partial_sort(matches_.begin(), matches_.end(), | |
| 111 matches_.begin() + max_matches, | |
| 112 AutocompleteMatch::MoreRelevant); | |
| 113 matches_.resize(max_matches); | |
| 114 } else { | |
| 115 std::sort(matches_.begin(), matches_.end(), | |
| 116 AutocompleteMatch::MoreRelevant); | |
| 117 } | |
| 118 } | |
| 119 | |
| 120 namespace { | |
| 121 | |
| 122 // for_each helper functor that calculates a match factor for each query term | |
| 123 // when calculating the final score. | |
| 124 // | |
| 125 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's | |
| 126 // title the term matches, and 2) where the match is positioned within the | |
| 127 // bookmark's title. A full length match earns a 1.0. A half-length match earns | |
| 128 // at most a 0.5 and at least a 0.25. A single character match against a title | |
| 129 // that is 100 characters long where the match is at the first character will | |
| 130 // earn a 0.01 and at the last character will earn a 0.0001. | |
| 131 class ScoringFunctor { | |
| 132 public: | |
| 133 // |title_length| is the length of the bookmark title against which this | |
| 134 // match will be scored. | |
| 135 explicit ScoringFunctor(size_t title_length) | |
| 136 : title_length_(static_cast<double>(title_length)), | |
| 137 scoring_factor_(0.0) { | |
| 138 } | |
| 139 | |
| 140 void operator()(const Snippet::MatchPosition& match) { | |
| 141 double term_length = static_cast<double>(match.second - match.first); | |
| 142 scoring_factor_ += term_length / title_length_ * | |
| 143 (title_length_ - match.first) / title_length_; | |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Indent 4 (only function args get indented eve
mrossetti
2012/10/15 19:22:46
Done.
| |
| 144 } | |
| 145 | |
| 146 double ScoringFactor() { return scoring_factor_; } | |
| 147 | |
| 148 private: | |
| 149 double title_length_; | |
| 150 double scoring_factor_; | |
| 151 }; | |
| 152 | |
| 153 } // namespace | |
| 154 | |
| 155 AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( | |
| 156 const bookmark_utils::TitleMatch& title_match) { | |
| 157 // Compose a match that has the URL of the bookmark and the bookmark's title, | |
| 158 // not the URL's page title, as the description. Note that if the relevance | |
| 159 // is never changed from 0 that the match will be discarded. false is passed | |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Kill passive voice: "The AutocompleteMatch we
mrossetti
2012/10/15 19:22:46
Done.
| |
| 160 // for the |deletable| parameter in the following constructor to indicate | |
| 161 // that the user cannot delete the autocomplete suggestion and, thus, the | |
| 162 // the bookmark. | |
| 163 AutocompleteMatch match(this, 0, false, AutocompleteMatch::BOOKMARK_TITLE); | |
| 164 const string16& title(title_match.node->GetTitle()); | |
| 165 DCHECK(!title.empty()); | |
| 166 const GURL& url(title_match.node->url()); | |
| 167 match.destination_url = url; | |
| 168 match.contents = net::FormatUrl(url, languages_, | |
| 169 net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP, | |
| 170 net::UnescapeRule::SPACES, NULL, NULL, NULL); | |
| 171 match.contents_class.push_back( | |
| 172 ACMatchClassification(0, ACMatchClassification::NONE)); | |
| 173 match.fill_into_edit = | |
| 174 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, | |
| 175 match.contents); | |
| 176 match.description = title; | |
| 177 match.description_class = | |
| 178 ClassificationsFromMatch(title_match.match_positions, | |
| 179 match.description.size()); | |
| 180 match.starred = true; | |
| 181 | |
| 182 // Summary on how a relevance score is determined for the match: | |
| 183 // | |
| 184 // For each term matching within the bookmark (as given by the set of | |
| 185 // ACMatchClassifications) calculate a 'factor', sum up those factors, then | |
|
Mark P
2012/10/10 22:28:57
I'm confused. I thought these matches were given
mrossetti
2012/10/15 19:22:46
Correct. I've edited and re-edited these comments
| |
| 186 // use the sum to figure out a value between the base score and the maximum | |
| 187 // score. | |
| 188 // | |
| 189 // The relevance calculation is based on: | |
|
Mark P
2012/10/10 22:28:57
Please divide this into section things: things tha
mrossetti
2012/10/15 19:22:46
Done.
| |
| 190 // 1) how much of the bookmark's title has been matched by the input text: | |
| 191 // (term length / title length), | |
|
Mark P
2012/10/10 22:28:57
term length -> total match length
mrossetti
2012/10/15 19:22:46
"term length". I've changed a couple of references
| |
| 192 // 2) where the matches occurs within the bookmark's title, giving more | |
| 193 // points for matches that appear earlier in the title: | |
| 194 // ((title length - position of match start) / title_length), and | |
| 195 // 3) how many times the bookmark's URL is referenced by other bookmarks | |
| 196 // where each additional URL reference up to three boosts the score by | |
| 197 // a fixed amount. | |
| 198 // | |
| 199 // Examples of #1 and #2 above: | |
| 200 // 1) Given a bookmark title of 'abcde fghijklm' and two different search | |
| 201 // terms, 'abcde' and 'fghijklm', 'fghijklm' will score higher (with a | |
| 202 // partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). | |
|
Mark P
2012/10/10 22:28:57
Please revise the text to make it clearer that thi
mrossetti
2012/10/15 19:22:46
I moved the examples to be adjacent to the points
| |
| 203 // 2) Given a bookmark title of 'abcde fghij' and two different search | |
| 204 // terms, 'abcde' and 'fghij', 'abcde' will score higher (with a | |
| 205 // match start position of 0 will give a partial factor of | |
| 206 // (14-0)/14 = 1.000) than 'fghij' (with a match start position of 6 | |
| 207 // (14-6)/14 = 0.571). | |
| 208 // | |
| 209 // See ScoringFunctor, above, for the details on how the factor is calculated. | |
|
Mark P
2012/10/10 22:28:57
Only the first two clauses above go into the facto
mrossetti
2012/10/15 19:22:46
Done.
| |
| 210 ScoringFunctor position_functor = | |
| 211 for_each(title_match.match_positions.begin(), | |
| 212 title_match.match_positions.end(), ScoringFunctor(title.size())); | |
|
Mark P
2012/10/10 22:28:57
Actually, let me change my request. Suppose the b
mrossetti
2012/10/15 19:22:46
There will be two match positions representing bot
Mark P
2012/10/18 18:37:54
Thanks for the explanation.
| |
| 213 const int kBaseBookmarkScore = 900; | |
| 214 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; | |
| 215 const double kBookmarkScoreRange = | |
| 216 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); | |
| 217 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping | |
| 218 // matches but let's play it safe. | |
| 219 match.relevance = std::min(kMaxBookmarkScore, | |
| 220 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + | |
| 221 kBaseBookmarkScore); | |
| 222 // Don't waste any time searching for additional referenced URLs if we | |
| 223 // already have a perfect title match. | |
| 224 if (match.relevance >= kMaxBookmarkScore) | |
| 225 return match; | |
| 226 // Boost the score if the bookmark's URL is referenced by other bookmarks. | |
| 227 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; | |
| 228 std::vector<const BookmarkNode*> nodes; | |
| 229 bookmark_model_->GetNodesByURL(url, &nodes); | |
| 230 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); | |
| 231 match.relevance += | |
| 232 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; | |
| 233 match.relevance = std::min(kMaxBookmarkScore, match.relevance); | |
| 234 return match; | |
| 235 } | |
| 236 | |
| 237 // static | |
| 238 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( | |
| 239 const Snippet::MatchPositions& positions, | |
| 240 size_t text_length) { | |
| 241 ACMatchClassifications classifications; | |
| 242 if (positions.empty()) { | |
| 243 classifications.push_back( | |
| 244 ACMatchClassification(0, ACMatchClassification::NONE)); | |
| 245 return classifications; | |
| 246 } | |
| 247 | |
| 248 for (Snippet::MatchPositions::const_iterator i = positions.begin(); | |
| 249 i != positions.end(); ++i) { | |
| 250 AutocompleteMatch::ACMatchClassifications new_class; | |
| 251 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, | |
| 252 text_length, 0, &new_class); | |
| 253 classifications = AutocompleteMatch::MergeClassifications( | |
| 254 classifications, new_class); | |
| 255 } | |
| 256 return classifications; | |
| 257 } | |
| OLD | NEW |