OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/browser/autocomplete/bookmark_provider.h" |
| 6 |
| 7 #include <algorithm> |
| 8 #include <functional> |
| 9 #include <vector> |
| 10 |
| 11 #include "base/metrics/histogram.h" |
| 12 #include "base/time.h" |
| 13 #include "chrome/browser/autocomplete/autocomplete_result.h" |
| 14 #include "chrome/browser/bookmarks/bookmark_model.h" |
| 15 #include "chrome/browser/bookmarks/bookmark_model_factory.h" |
| 16 #include "chrome/browser/prefs/pref_service.h" |
| 17 #include "chrome/browser/profiles/profile.h" |
| 18 #include "chrome/common/pref_names.h" |
| 19 #include "net/base/net_util.h" |
| 20 |
| 21 typedef std::vector<bookmark_utils::TitleMatch> TitleMatches; |
| 22 |
| 23 // BookmarkProvider ------------------------------------------------------------ |
| 24 |
| 25 BookmarkProvider::BookmarkProvider( |
| 26 AutocompleteProviderListener* listener, |
| 27 Profile* profile) |
| 28 : AutocompleteProvider(listener, profile, |
| 29 AutocompleteProvider::TYPE_BOOKMARK), |
| 30 bookmark_model_(NULL) { |
| 31 if (profile) { |
| 32 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); |
| 33 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); |
| 34 } |
| 35 } |
| 36 |
| 37 void BookmarkProvider::Start(const AutocompleteInput& input, |
| 38 bool minimal_changes) { |
| 39 if (minimal_changes) |
| 40 return; |
| 41 matches_.clear(); |
| 42 |
| 43 // Short-circuit any matching when inline autocompletion is disabled and |
| 44 // we're looking for BEST_MATCH because none of the BookmarkProvider's |
| 45 // matches can score high enough to qualify. |
| 46 if (input.text().empty() || |
| 47 ((input.type() != AutocompleteInput::UNKNOWN) && |
| 48 (input.type() != AutocompleteInput::REQUESTED_URL) && |
| 49 (input.type() != AutocompleteInput::QUERY)) || |
| 50 ((input.matches_requested() == AutocompleteInput::BEST_MATCH) && |
| 51 input.prevent_inline_autocomplete())) |
| 52 return; |
| 53 |
| 54 base::TimeTicks start_time = base::TimeTicks::Now(); |
| 55 DoAutocomplete(input, |
| 56 input.matches_requested() == AutocompleteInput::BEST_MATCH); |
| 57 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime", |
| 58 base::TimeTicks::Now() - start_time); |
| 59 } |
| 60 |
| 61 BookmarkProvider::~BookmarkProvider() {} |
| 62 |
| 63 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, |
| 64 bool best_match) { |
| 65 // We may not have a bookmark model for some unit tests. |
| 66 if (!bookmark_model_) |
| 67 return; |
| 68 |
| 69 TitleMatches matches; |
| 70 // Retrieve enough bookmarks so that we have a reasonable probability of |
| 71 // suggesting the one that the user desires. |
| 72 const size_t kMaxBookmarkMatches = 50; |
| 73 |
| 74 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's |
| 75 // search terms using the following rules: |
| 76 // - The search text is broken up into search terms. Each term is searched |
| 77 // for separately. |
| 78 // - Term matches are always performed against the start of a word. 'def' |
| 79 // will match against 'define' but not against 'indefinite'. |
| 80 // - Terms must be at least three characters in length in order to perform |
| 81 // partial word matches. Any term of lesser length will only be used as an |
| 82 // exact match. 'def' will match against 'define' but 'de' will not match. |
| 83 // - A search containing multiple terms will return results with those words |
| 84 // occuring in any order. |
| 85 // - Terms enclosed in quotes comprises a phrase that must match exactly. |
| 86 // - Multiple terms enclosed in quotes will require those exact words in that |
| 87 // exact order to match. |
| 88 // |
| 89 // Note: GetBookmarksWithTitlesMatching() will never return a match span |
| 90 // greater than the length of the title against which it is being matched, |
| 91 // nor can those spans ever overlap because the match spans are coalesced |
| 92 // for all matched terms. |
| 93 // |
| 94 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching |
| 95 // for complete details of how title searches are performed against the user's |
| 96 // bookmarks. |
| 97 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), |
| 98 kMaxBookmarkMatches, |
| 99 &matches); |
| 100 if (matches.empty()) |
| 101 return; // There were no matches. |
| 102 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); |
| 103 ++i) { |
| 104 // Create and score the AutocompleteMatch. If its score is 0 then the |
| 105 // match is discarded. |
| 106 AutocompleteMatch match(TitleMatchToACMatch(*i)); |
| 107 if (match.relevance > 0) |
| 108 matches_.push_back(match); |
| 109 } |
| 110 |
| 111 // Sort and clip the resulting matches. |
| 112 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; |
| 113 if (matches_.size() > max_matches) { |
| 114 std::partial_sort(matches_.begin(), matches_.end(), |
| 115 matches_.begin() + max_matches, |
| 116 AutocompleteMatch::MoreRelevant); |
| 117 matches_.resize(max_matches); |
| 118 } else { |
| 119 std::sort(matches_.begin(), matches_.end(), |
| 120 AutocompleteMatch::MoreRelevant); |
| 121 } |
| 122 } |
| 123 |
| 124 namespace { |
| 125 |
| 126 // for_each helper functor that calculates a match factor for each query term |
| 127 // when calculating the final score. |
| 128 // |
| 129 // Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's |
| 130 // title the term matches, and 2) where the match is positioned within the |
| 131 // bookmark's title. A full length match earns a 1.0. A half-length match earns |
| 132 // at most a 0.5 and at least a 0.25. A single character match against a title |
| 133 // that is 100 characters long where the match is at the first character will |
| 134 // earn a 0.01 and at the last character will earn a 0.0001. |
| 135 class ScoringFunctor { |
| 136 public: |
| 137 // |title_length| is the length of the bookmark title against which this |
| 138 // match will be scored. |
| 139 explicit ScoringFunctor(size_t title_length) |
| 140 : title_length_(static_cast<double>(title_length)), |
| 141 scoring_factor_(0.0) { |
| 142 } |
| 143 |
| 144 void operator()(const Snippet::MatchPosition& match) { |
| 145 double term_length = static_cast<double>(match.second - match.first); |
| 146 scoring_factor_ += term_length / title_length_ * |
| 147 (title_length_ - match.first) / title_length_; |
| 148 } |
| 149 |
| 150 double ScoringFactor() { return scoring_factor_; } |
| 151 |
| 152 private: |
| 153 double title_length_; |
| 154 double scoring_factor_; |
| 155 }; |
| 156 |
| 157 } // namespace |
| 158 |
| 159 AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( |
| 160 const bookmark_utils::TitleMatch& title_match) { |
| 161 // The AutocompleteMatch we construct is non-deletable because the only |
| 162 // way to support this would be to delete the underlying bookmark, which is |
| 163 // unlikely to be what the user intends. |
| 164 AutocompleteMatch match(this, 0, false, AutocompleteMatch::BOOKMARK_TITLE); |
| 165 const string16& title(title_match.node->GetTitle()); |
| 166 DCHECK(!title.empty()); |
| 167 const GURL& url(title_match.node->url()); |
| 168 match.destination_url = url; |
| 169 match.contents = net::FormatUrl(url, languages_, |
| 170 net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP, |
| 171 net::UnescapeRule::SPACES, NULL, NULL, NULL); |
| 172 match.contents_class.push_back( |
| 173 ACMatchClassification(0, ACMatchClassification::NONE)); |
| 174 match.fill_into_edit = |
| 175 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, |
| 176 match.contents); |
| 177 match.description = title; |
| 178 match.description_class = |
| 179 ClassificationsFromMatch(title_match.match_positions, |
| 180 match.description.size()); |
| 181 match.starred = true; |
| 182 |
| 183 // Summary on how a relevance score is determined for the match: |
| 184 // |
| 185 // For each term matching within the bookmark's title (as given by the set of |
| 186 // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then |
| 187 // use the sum to figure out a value between the base score and the maximum |
| 188 // score. |
| 189 // |
| 190 // The factor for each term is calculated based on: |
| 191 // |
| 192 // 1) how much of the bookmark's title has been matched by the term: |
| 193 // (term length / title length). |
| 194 // |
| 195 // Example: Given a bookmark title 'abcde fghijklm', with a title length |
| 196 // of 14, and two different search terms, 'abcde' and 'fghijklm', with |
| 197 // term lengths of 5 and 8, respectively, 'fghijklm' will score higher |
| 198 // (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). |
| 199 // |
| 200 // 2) where the term match occurs within the bookmark's title, giving more |
| 201 // points for matches that appear earlier in the title: |
| 202 // ((title length - position of match start) / title_length). |
| 203 // |
| 204 // Example: Given a bookmark title of 'abcde fghijklm', with a title length |
| 205 // of 14, and two different search terms, 'abcde' and 'fghij', with |
| 206 // start positions of 0 and 6, respectively, 'abcde' will score higher |
| 207 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with |
| 208 // a partial factor of (14-6)/14 = 0.571 ). |
| 209 // |
| 210 // Once all term factors have been calculated they are summed. The resulting |
| 211 // sum will never be greater than 1.0. This sum is then multiplied against |
| 212 // the scoring range available, which is 299. The 299 is calculated by |
| 213 // subtracting the minimum possible score, 900, from the maximum possible |
| 214 // score, 1199. This product, ranging from 0 to 299, is added to the minimum |
| 215 // possible score, 900, giving the preliminary score. |
| 216 // |
| 217 // If the preliminary score is less than the maximum possible score, 1199, |
| 218 // it can be boosted up to that maximum possible score if the URL referenced |
| 219 // by the bookmark is also referenced by any of the user's other bookmarks. |
| 220 // A count of how many times the bookmark's URL is referenced is determined |
| 221 // and, for each additional reference beyond the one for the bookmark being |
| 222 // scored up to a maximum of three, the score is boosted by a fixed amount |
| 223 // given by |kURLCountBoost|, below. |
| 224 // |
| 225 ScoringFunctor position_functor = |
| 226 for_each(title_match.match_positions.begin(), |
| 227 title_match.match_positions.end(), ScoringFunctor(title.size())); |
| 228 const int kBaseBookmarkScore = 900; |
| 229 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; |
| 230 const double kBookmarkScoreRange = |
| 231 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); |
| 232 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping |
| 233 // matches but let's play it safe. |
| 234 match.relevance = std::min(kMaxBookmarkScore, |
| 235 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + |
| 236 kBaseBookmarkScore); |
| 237 // Don't waste any time searching for additional referenced URLs if we |
| 238 // already have a perfect title match. |
| 239 if (match.relevance >= kMaxBookmarkScore) |
| 240 return match; |
| 241 // Boost the score if the bookmark's URL is referenced by other bookmarks. |
| 242 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; |
| 243 std::vector<const BookmarkNode*> nodes; |
| 244 bookmark_model_->GetNodesByURL(url, &nodes); |
| 245 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); |
| 246 match.relevance += |
| 247 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; |
| 248 match.relevance = std::min(kMaxBookmarkScore, match.relevance); |
| 249 return match; |
| 250 } |
| 251 |
| 252 // static |
| 253 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( |
| 254 const Snippet::MatchPositions& positions, |
| 255 size_t text_length) { |
| 256 ACMatchClassifications classifications; |
| 257 if (positions.empty()) { |
| 258 classifications.push_back( |
| 259 ACMatchClassification(0, ACMatchClassification::NONE)); |
| 260 return classifications; |
| 261 } |
| 262 |
| 263 for (Snippet::MatchPositions::const_iterator i = positions.begin(); |
| 264 i != positions.end(); ++i) { |
| 265 AutocompleteMatch::ACMatchClassifications new_class; |
| 266 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, |
| 267 text_length, 0, &new_class); |
| 268 classifications = AutocompleteMatch::MergeClassifications( |
| 269 classifications, new_class); |
| 270 } |
| 271 return classifications; |
| 272 } |
OLD | NEW |