Chromium Code Reviews| Index: chrome/browser/autocomplete/bookmark_provider.cc |
| =================================================================== |
| --- chrome/browser/autocomplete/bookmark_provider.cc (revision 0) |
| +++ chrome/browser/autocomplete/bookmark_provider.cc (revision 0) |
| @@ -0,0 +1,257 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "chrome/browser/autocomplete/bookmark_provider.h" |
| + |
| +#include <algorithm> |
| +#include <functional> |
| +#include <vector> |
| + |
| +#include "base/metrics/histogram.h" |
| +#include "base/time.h" |
| +#include "chrome/browser/autocomplete/autocomplete_result.h" |
| +#include "chrome/browser/bookmarks/bookmark_model.h" |
| +#include "chrome/browser/bookmarks/bookmark_model_factory.h" |
| +#include "chrome/browser/prefs/pref_service.h" |
| +#include "chrome/browser/profiles/profile.h" |
| +#include "chrome/common/pref_names.h" |
| +#include "net/base/net_util.h" |
| + |
| +typedef std::vector<bookmark_utils::TitleMatch> TitleMatches; |
| + |
| +// BookmarkProvider ------------------------------------------------------------ |
| + |
| +BookmarkProvider::BookmarkProvider( |
| + AutocompleteProviderListener* listener, |
| + Profile* profile) |
| + : AutocompleteProvider(listener, profile, |
| + AutocompleteProvider::TYPE_BOOKMARK), |
| + bookmark_model_(NULL) { |
| + if (profile) { |
| + bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); |
| + languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); |
| + } |
| +} |
| + |
| +void BookmarkProvider::Start(const AutocompleteInput& input, |
| + bool minimal_changes) { |
| + if (minimal_changes) |
| + return; |
| + matches_.clear(); |
| + |
| + if (input.text().empty() || |
| + ((input.type() != AutocompleteInput::UNKNOWN) && |
| + (input.type() != AutocompleteInput::REQUESTED_URL) && |
| + (input.type() != AutocompleteInput::QUERY)) || |
| + (input.matches_requested() == AutocompleteInput::BEST_MATCH && |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Be consistent about whether boolean subexpres
mrossetti
2012/10/15 19:22:46
Done.
|
| + input.prevent_inline_autocomplete())) |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Might want a comment about why we're bailing
mrossetti
2012/10/15 19:22:46
Done.
|
| + return; |
| + |
| + base::TimeTicks start_time = base::TimeTicks::Now(); |
| + DoAutocomplete(input, |
| + input.matches_requested() == AutocompleteInput::BEST_MATCH); |
| + UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime", |
| + base::TimeTicks::Now() - start_time); |
| +} |
| + |
| +BookmarkProvider::~BookmarkProvider() {} |
| + |
| +void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, |
| + bool best_match) { |
| + // We may not have a bookmark model for some unit tests. |
| + if (!bookmark_model_) |
| + return; |
| + |
| + TitleMatches matches; |
| + // Retrieve enough bookmarks so that we have a reasonable probability of |
| + // retrieving the one that the user desires. |
| + const size_t kMaxBookmarkMatches = 50; |
| + |
| + // GetBookmarksWithTitlesMatching returns bookmarks matching the user's |
| + // search terms using the following rules: |
| + // - The search text is broken up into search terms. Each term is searched |
| + // for separately. |
| + // - Term matches are always performed against the start of a word. 'def' |
| + // will match against 'define' but not against 'indefinite'. |
| + // - Terms must be at least three characters in length in order to perform |
| + // partial word matches. Any term of lesser length will only be used as an |
| + // exact match. 'def' will match against 'define' but 'de' will not match. |
| + // - A search containing multiple terms will return results with those words |
| + // occuring in any order. |
| + // - Terms enclosed in quotes comprises a phrase that must match exactly. |
| + // - Multiple terms enclosed in quotes will require those exact words in that |
| + // exact order to match. |
| + // |
| + // Note that the AutocompleteMatch::ACMatchClassification for each match |
|
Mark P
2012/10/10 22:28:57
I don't see how this paragraph is relevant here.
mrossetti
2012/10/15 19:22:46
Reworded.
|
| + // can never result in a match span greater than the length of the title |
| + // against which it is being matched nor can those spans ever overlap because |
| + // GetBookmarksWithTitlesMatching compresses and combines the spans for all |
| + // terms. |
| + // |
| + // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching |
| + // for complete details of how title searches are performed against the user's |
| + // bookmarks. |
| + bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), |
| + kMaxBookmarkMatches, |
| + &matches); |
| + if (matches.empty()) |
| + return; // There were no matches. |
| + for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); |
| + ++i) { |
| + AutocompleteMatch match(TitleMatchToACMatch(*i)); |
| + if (match.relevance > 0) |
| + matches_.push_back(match); |
| + } |
| + |
| + // Sort and clip the resulting matches. |
| + size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; |
| + if (matches_.size() > max_matches) { |
| + std::partial_sort(matches_.begin(), matches_.end(), |
| + matches_.begin() + max_matches, |
| + AutocompleteMatch::MoreRelevant); |
| + matches_.resize(max_matches); |
| + } else { |
| + std::sort(matches_.begin(), matches_.end(), |
| + AutocompleteMatch::MoreRelevant); |
| + } |
| +} |
| + |
| +namespace { |
| + |
| +// for_each helper functor that calculates a match factor for each query term |
| +// when calculating the final score. |
| +// |
| +// Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's |
| +// title the term matches, and 2) where the match is positioned within the |
| +// bookmark's title. A full length match earns a 1.0. A half-length match earns |
| +// at most a 0.5 and at least a 0.25. A single character match against a title |
| +// that is 100 characters long where the match is at the first character will |
| +// earn a 0.01 and at the last character will earn a 0.0001. |
| +class ScoringFunctor { |
| + public: |
| + // |title_length| is the length of the bookmark title against which this |
| + // match will be scored. |
| + explicit ScoringFunctor(size_t title_length) |
| + : title_length_(static_cast<double>(title_length)), |
| + scoring_factor_(0.0) { |
| + } |
| + |
| + void operator()(const Snippet::MatchPosition& match) { |
| + double term_length = static_cast<double>(match.second - match.first); |
| + scoring_factor_ += term_length / title_length_ * |
| + (title_length_ - match.first) / title_length_; |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Indent 4 (only function args get indented eve
mrossetti
2012/10/15 19:22:46
Done.
|
| + } |
| + |
| + double ScoringFactor() { return scoring_factor_; } |
| + |
| + private: |
| + double title_length_; |
| + double scoring_factor_; |
| +}; |
| + |
| +} // namespace |
| + |
| +AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( |
| + const bookmark_utils::TitleMatch& title_match) { |
| + // Compose a match that has the URL of the bookmark and the bookmark's title, |
| + // not the URL's page title, as the description. Note that if the relevance |
| + // is never changed from 0 that the match will be discarded. false is passed |
|
Peter Kasting
2012/10/10 18:29:06
Nit: Kill passive voice: "The AutocompleteMatch we
mrossetti
2012/10/15 19:22:46
Done.
|
| + // for the |deletable| parameter in the following constructor to indicate |
| + // that the user cannot delete the autocomplete suggestion and, thus, the |
| + // the bookmark. |
| + AutocompleteMatch match(this, 0, false, AutocompleteMatch::BOOKMARK_TITLE); |
| + const string16& title(title_match.node->GetTitle()); |
| + DCHECK(!title.empty()); |
| + const GURL& url(title_match.node->url()); |
| + match.destination_url = url; |
| + match.contents = net::FormatUrl(url, languages_, |
| + net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP, |
| + net::UnescapeRule::SPACES, NULL, NULL, NULL); |
| + match.contents_class.push_back( |
| + ACMatchClassification(0, ACMatchClassification::NONE)); |
| + match.fill_into_edit = |
| + AutocompleteInput::FormattedStringWithEquivalentMeaning(url, |
| + match.contents); |
| + match.description = title; |
| + match.description_class = |
| + ClassificationsFromMatch(title_match.match_positions, |
| + match.description.size()); |
| + match.starred = true; |
| + |
| + // Summary on how a relevance score is determined for the match: |
| + // |
| + // For each term matching within the bookmark (as given by the set of |
| + // ACMatchClassifications) calculate a 'factor', sum up those factors, then |
|
Mark P
2012/10/10 22:28:57
I'm confused. I thought these matches were given
mrossetti
2012/10/15 19:22:46
Correct. I've edited and re-edited these comments
|
| + // use the sum to figure out a value between the base score and the maximum |
| + // score. |
| + // |
| + // The relevance calculation is based on: |
|
Mark P
2012/10/10 22:28:57
Please divide this into section things: things tha
mrossetti
2012/10/15 19:22:46
Done.
|
| + // 1) how much of the bookmark's title has been matched by the input text: |
| + // (term length / title length), |
|
Mark P
2012/10/10 22:28:57
term length -> total match length
mrossetti
2012/10/15 19:22:46
"term length". I've changed a couple of references
|
| + // 2) where the matches occurs within the bookmark's title, giving more |
| + // points for matches that appear earlier in the title: |
| + // ((title length - position of match start) / title_length), and |
| + // 3) how many times the bookmark's URL is referenced by other bookmarks |
| + // where each additional URL reference up to three boosts the score by |
| + // a fixed amount. |
| + // |
| + // Examples of #1 and #2 above: |
| + // 1) Given a bookmark title of 'abcde fghijklm' and two different search |
| + // terms, 'abcde' and 'fghijklm', 'fghijklm' will score higher (with a |
| + // partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). |
|
Mark P
2012/10/10 22:28:57
Please revise the text to make it clearer that thi
mrossetti
2012/10/15 19:22:46
I moved the examples to be adjacent to the points
|
| + // 2) Given a bookmark title of 'abcde fghij' and two different search |
| + // terms, 'abcde' and 'fghij', 'abcde' will score higher (with a |
| + // match start position of 0 will give a partial factor of |
| + // (14-0)/14 = 1.000) than 'fghij' (with a match start position of 6 |
| + // (14-6)/14 = 0.571). |
| + // |
| + // See ScoringFunctor, above, for the details on how the factor is calculated. |
|
Mark P
2012/10/10 22:28:57
Only the first two clauses above go into the facto
mrossetti
2012/10/15 19:22:46
Done.
|
| + ScoringFunctor position_functor = |
| + for_each(title_match.match_positions.begin(), |
| + title_match.match_positions.end(), ScoringFunctor(title.size())); |
|
Mark P
2012/10/10 22:28:57
Actually, let me change my request. Suppose the b
mrossetti
2012/10/15 19:22:46
There will be two match positions representing bot
Mark P
2012/10/18 18:37:54
Thanks for the explanation.
|
| + const int kBaseBookmarkScore = 900; |
| + const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; |
| + const double kBookmarkScoreRange = |
| + static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); |
| + // It's not likely that GetBookmarksWithTitlesMatching will return overlapping |
| + // matches but let's play it safe. |
| + match.relevance = std::min(kMaxBookmarkScore, |
| + static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + |
| + kBaseBookmarkScore); |
| + // Don't waste any time searching for additional referenced URLs if we |
| + // already have a perfect title match. |
| + if (match.relevance >= kMaxBookmarkScore) |
| + return match; |
| + // Boost the score if the bookmark's URL is referenced by other bookmarks. |
| + const int kURLCountBoost[4] = { 0, 75, 125, 150 }; |
| + std::vector<const BookmarkNode*> nodes; |
| + bookmark_model_->GetNodesByURL(url, &nodes); |
| + DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); |
| + match.relevance += |
| + kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; |
| + match.relevance = std::min(kMaxBookmarkScore, match.relevance); |
| + return match; |
| +} |
| + |
| +// static |
| +ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( |
| + const Snippet::MatchPositions& positions, |
| + size_t text_length) { |
| + ACMatchClassifications classifications; |
| + if (positions.empty()) { |
| + classifications.push_back( |
| + ACMatchClassification(0, ACMatchClassification::NONE)); |
| + return classifications; |
| + } |
| + |
| + for (Snippet::MatchPositions::const_iterator i = positions.begin(); |
| + i != positions.end(); ++i) { |
| + AutocompleteMatch::ACMatchClassifications new_class; |
| + AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, |
| + text_length, 0, &new_class); |
| + classifications = AutocompleteMatch::MergeClassifications( |
| + classifications, new_class); |
| + } |
| + return classifications; |
| +} |
| Property changes on: chrome/browser/autocomplete/bookmark_provider.cc |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + LF |