Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(410)

Unified Diff: components/omnibox/browser/shortcuts_provider.cc

Issue 1877833002: Optimize shortcuts provider (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fixes after review, round 2 Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/omnibox/browser/shortcuts_provider.cc
diff --git a/components/omnibox/browser/shortcuts_provider.cc b/components/omnibox/browser/shortcuts_provider.cc
index 22ee0c71543b97dc00a86e9acc4582fa098805e2..a4ab5f7f40eeea70a7942b3265ce6775cbf0262a 100644
--- a/components/omnibox/browser/shortcuts_provider.cc
+++ b/components/omnibox/browser/shortcuts_provider.cc
@@ -28,6 +28,7 @@
#include "components/omnibox/browser/autocomplete_provider_client.h"
#include "components/omnibox/browser/autocomplete_result.h"
#include "components/omnibox/browser/history_provider.h"
+#include "components/omnibox/browser/match_compare.h"
#include "components/omnibox/browser/omnibox_field_trial.h"
#include "components/omnibox/browser/url_prefix.h"
#include "components/prefs/pref_service.h"
@@ -51,6 +52,15 @@ class DestinationURLEqualsURL {
const int ShortcutsProvider::kShortcutsProviderDefaultMaxRelevance = 1199;
+ShortcutsProvider::ShortcutMatch::ShortcutMatch(
+ int relevance, const GURL& stripped_destination_url,
+ const ShortcutsDatabase::Shortcut* shortcut) :
Peter Kasting 2016/04/14 23:52:33 Nit: ':' goes at the beginning of the next line, n
Alexander Yashkin 2016/04/15 09:14:14 Done.
+ relevance(relevance),
Peter Kasting 2016/04/14 23:52:33 Nit: These lines are not sufficiently indented
Alexander Yashkin 2016/04/15 09:14:15 Done.
+ stripped_destination_url(stripped_destination_url),
+ shortcut(shortcut),
+ contents(shortcut->match_core.contents),
+ type(static_cast<AutocompleteMatch::Type>(shortcut->match_core.type)) {}
+
ShortcutsProvider::ShortcutsProvider(AutocompleteProviderClient* client)
: AutocompleteProvider(AutocompleteProvider::TYPE_SHORTCUTS),
client_(client),
@@ -118,6 +128,134 @@ ShortcutsProvider::~ShortcutsProvider() {
backend->RemoveObserver(this);
}
+// static
Peter Kasting 2016/04/14 23:52:33 Nit: Might want to move these functions up in a se
Alexander Yashkin 2016/04/15 09:14:14 Reverted move.
+ShortcutsProvider::WordMap ShortcutsProvider::CreateWordMapForString(
+ const base::string16& text) {
+ // First, convert |text| to a vector of the unique words in it.
+ WordMap word_map;
+ base::i18n::BreakIterator word_iter(text,
+ base::i18n::BreakIterator::BREAK_WORD);
+ if (!word_iter.Init())
+ return word_map;
+ std::vector<base::string16> words;
+ while (word_iter.Advance()) {
+ if (word_iter.IsWord())
+ words.push_back(word_iter.GetString());
+ }
+ if (words.empty())
+ return word_map;
+ std::sort(words.begin(), words.end());
+ words.erase(std::unique(words.begin(), words.end()), words.end());
+
+ // Now create a map from (first character) to (words beginning with that
+ // character). We insert in reverse lexicographical order and rely on the
+ // multimap preserving insertion order for values with the same key. (This
+ // is mandated in C++11, and part of that decision was based on a survey of
+ // existing implementations that found that it was already true everywhere.)
+ std::reverse(words.begin(), words.end());
+ for (std::vector<base::string16>::const_iterator i(words.begin());
+ i != words.end(); ++i)
+ word_map.insert(std::make_pair((*i)[0], *i));
+ return word_map;
+}
+
+// static
+ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(
+ const base::string16& find_text,
+ const WordMap& find_words,
+ const base::string16& text,
+ const ACMatchClassifications& original_class) {
+ DCHECK(!find_text.empty());
+ DCHECK(!find_words.empty());
+
+ // The code below assumes |text| is nonempty and therefore the resulting
+ // classification vector should always be nonempty as well. Returning early
+ // if |text| is empty assures we'll return the (correct) empty vector rather
+ // than a vector with a single (0, NONE) match.
+ if (text.empty())
+ return original_class;
+
+ // First check whether |text| begins with |find_text| and mark that whole
+ // section as a match if so.
+ base::string16 text_lowercase(base::i18n::ToLower(text));
+ ACMatchClassifications match_class;
+ size_t last_position = 0;
+ if (base::StartsWith(text_lowercase, find_text,
+ base::CompareCase::SENSITIVE)) {
+ match_class.push_back(
+ ACMatchClassification(0, ACMatchClassification::MATCH));
+ last_position = find_text.length();
+ // If |text_lowercase| is actually equal to |find_text|, we don't need to
+ // (and in fact shouldn't) put a trailing NONE classification after the end
+ // of the string.
+ if (last_position < text_lowercase.length()) {
+ match_class.push_back(
+ ACMatchClassification(last_position, ACMatchClassification::NONE));
+ }
+ } else {
+ // |match_class| should start at position 0. If the first matching word is
+ // found at position 0, this will be popped from the vector further down.
+ match_class.push_back(
+ ACMatchClassification(0, ACMatchClassification::NONE));
+ }
+
+ // Now, starting with |last_position|, check each character in
+ // |text_lowercase| to see if we have words starting with that character in
+ // |find_words|. If so, check each of them to see if they match the portion
+ // of |text_lowercase| beginning with |last_position|. Accept the first
+ // matching word found (which should be the longest possible match at this
+ // location, given the construction of |find_words|) and add a MATCH region to
+ // |match_class|, moving |last_position| to be after the matching word. If we
+ // found no matching words, move to the next character and repeat.
+ while (last_position < text_lowercase.length()) {
+ std::pair<WordMap::const_iterator, WordMap::const_iterator> range(
+ find_words.equal_range(text_lowercase[last_position]));
+ size_t next_character = last_position + 1;
+ for (WordMap::const_iterator i(range.first); i != range.second; ++i) {
+ const base::string16& word = i->second;
+ size_t word_end = last_position + word.length();
+ if ((word_end <= text_lowercase.length()) &&
+ !text_lowercase.compare(last_position, word.length(), word)) {
+ // Collapse adjacent ranges into one.
+ if (match_class.back().offset == last_position)
+ match_class.pop_back();
+
+ AutocompleteMatch::AddLastClassificationIfNecessary(
+ &match_class, last_position, ACMatchClassification::MATCH);
+ if (word_end < text_lowercase.length()) {
+ match_class.push_back(
+ ACMatchClassification(word_end, ACMatchClassification::NONE));
+ }
+ last_position = word_end;
+ break;
+ }
+ }
+ last_position = std::max(last_position, next_character);
+ }
+
+ return AutocompleteMatch::MergeClassifications(original_class, match_class);
+}
+
+// static
+void ShortcutsProvider::SortAndDedupMatches(
+ OmniboxEventProto::PageClassification page_classification,
+ std::vector<ShortcutMatch>* matches) {
+ // Sort matches such that duplicate matches are consecutive.
+ std::sort(matches->begin(), matches->end(),
+ DestinationSort<ShortcutMatch>(page_classification));
+
+ // Erase duplicate matches. Duplicate matches are those with
+ // stripped_destination_url fields equal and non empty.
+ matches->erase(
+ std::unique(matches->begin(), matches->end(),
+ [](const ShortcutMatch& elem1, const ShortcutMatch& elem2) {
+ return !elem1.stripped_destination_url.is_empty() &&
+ (elem1.stripped_destination_url ==
+ elem2.stripped_destination_url);
+ }),
+ matches->end());
+}
+
void ShortcutsProvider::OnShortcutsLoaded() {
initialized_ = true;
}
@@ -138,6 +276,8 @@ void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {
max_relevance = kShortcutsProviderDefaultMaxRelevance;
TemplateURLService* template_url_service = client_->GetTemplateURLService();
const base::string16 fixed_up_input(FixupUserInput(input).second);
+
+ std::vector<ShortcutMatch> shortcut_matches;
for (ShortcutsBackend::ShortcutMap::const_iterator it =
FindFirstMatch(term_string, backend.get());
it != backend->shortcuts_map().end() &&
@@ -147,16 +287,21 @@ void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {
// Don't return shortcuts with zero relevance.
int relevance = CalculateScore(term_string, it->second, max_relevance);
if (relevance) {
- matches_.push_back(
- ShortcutToACMatch(it->second, relevance, input, fixed_up_input));
- matches_.back().ComputeStrippedDestinationURL(
- input, client_->GetAcceptLanguages(), template_url_service);
+ const ShortcutsDatabase::Shortcut& shortcut = it->second;
+ GURL stripped_destination_url(AutocompleteMatch::GURLToStrippedGURL(
+ shortcut.match_core.destination_url,
+ input,
+ languages_,
+ template_url_service,
+ shortcut.match_core.keyword));
+ shortcut_matches.push_back(
+ ShortcutMatch(relevance, stripped_destination_url, &(it->second)));
Peter Kasting 2016/04/14 23:52:33 Nit: Parens around -> operator not necessary
Alexander Yashkin 2016/04/15 09:14:14 Done.
}
}
// Remove duplicates. This is important because it's common to have multiple
// shortcuts pointing to the same URL, e.g., ma, mai, and mail all pointing
// to mail.google.com, so typing "m" will return them all. If we then simply
- // clamp to kMaxMatches and let the AutocompleteResult take care of
+ // clamp to kMaxMatches and let the SortAndDedupMatches take care of
// collapsing the duplicates, we'll effectively only be returning one match,
// instead of several possibilities.
//
@@ -164,37 +309,53 @@ void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {
// |duplicate_matches| field--duplicates don't need to be preserved in the
// matches because they are only used for deletions, and this provider
// deletes matches based on the URL.
- AutocompleteResult::DedupMatchesByDestination(
- input.current_page_classification(), false, &matches_);
+ SortAndDedupMatches(input.current_page_classification(), &shortcut_matches);
+
// Find best matches.
std::partial_sort(
- matches_.begin(),
- matches_.begin() +
- std::min(AutocompleteProvider::kMaxMatches, matches_.size()),
- matches_.end(), &AutocompleteMatch::MoreRelevant);
- if (matches_.size() > AutocompleteProvider::kMaxMatches) {
- matches_.erase(matches_.begin() + AutocompleteProvider::kMaxMatches,
- matches_.end());
+ shortcut_matches.begin(),
+ shortcut_matches.begin() +
+ std::min(AutocompleteProvider::kMaxMatches, shortcut_matches.size()),
+ shortcut_matches.end(),
+ [](const ShortcutMatch& elem1, const ShortcutMatch& elem2) {
+ // For equal-relevance matches, we sort alphabetically, so that
+ // providers who return multiple elements at the same priority get a
+ // "stable" sort across multiple updates.
Peter Kasting 2016/04/14 23:52:33 Nit: This comment is a bit odd since this code is
Alexander Yashkin 2016/04/15 09:14:14 Done.
+ return elem1.relevance == elem2.relevance ?
+ elem1.contents < elem2.contents :
+ elem1.relevance > elem2.relevance;
+ });
+ if (shortcut_matches.size() > AutocompleteProvider::kMaxMatches) {
+ shortcut_matches.erase(
+ shortcut_matches.begin() + AutocompleteProvider::kMaxMatches,
+ shortcut_matches.end());
}
- // Guarantee that all scores are decreasing (but do not assign any scores
- // below 1).
- for (ACMatches::iterator it = matches_.begin(); it != matches_.end(); ++it) {
- max_relevance = std::min(max_relevance, it->relevance);
- it->relevance = max_relevance;
+ // Create and initialize autocomplete matches from shortcut matches.
Peter Kasting 2016/04/14 23:52:33 Nit: Wrap all comment lines as close to 80 columns
Alexander Yashkin 2016/04/15 09:14:14 Done.
+ // Also guarantee that all relevance scores are decreasing
+ // (but do not assign any scores below 1).
+ WordMap terms_map(CreateWordMapForString(term_string));
+ matches_.reserve(shortcut_matches.size());
+ for (ShortcutMatch& match : shortcut_matches) {
+ max_relevance = std::min(max_relevance, match.relevance);
+ match.relevance = max_relevance;
Peter Kasting 2016/04/14 23:52:33 Rather than update match.relevance, it would be be
Alexander Yashkin 2016/04/15 09:14:14 Done.
if (max_relevance > 1)
--max_relevance;
+ matches_.push_back(ShortcutMatchToACMatch(match, input, fixed_up_input,
+ term_string, terms_map));
}
}
-AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(
- const ShortcutsDatabase::Shortcut& shortcut,
- int relevance,
+AutocompleteMatch ShortcutsProvider::ShortcutMatchToACMatch(
+ const ShortcutMatch& shortcut_match,
const AutocompleteInput& input,
- const base::string16& fixed_up_input_text) {
+ const base::string16& fixed_up_input_text,
+ const base::string16 term_string,
+ const WordMap& terms_map) {
DCHECK(!input.text().empty());
+ const ShortcutsDatabase::Shortcut& shortcut = *shortcut_match.shortcut;
AutocompleteMatch match;
match.provider = this;
- match.relevance = relevance;
+ match.relevance = shortcut_match.relevance;
match.deletable = true;
match.fill_into_edit = shortcut.match_core.fill_into_edit;
match.destination_url = shortcut.match_core.destination_url;
@@ -208,6 +369,7 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(
match.transition = ui::PageTransitionFromInt(shortcut.match_core.transition);
match.type = static_cast<AutocompleteMatch::Type>(shortcut.match_core.type);
match.keyword = shortcut.match_core.keyword;
+ match.stripped_destination_url = shortcut_match.stripped_destination_url;
Peter Kasting 2016/04/14 23:52:33 Does this matter? It seems likely that Autocomple
Alexander Yashkin 2016/04/15 09:14:14 Done.
match.RecordAdditionalInfo("number of hits", shortcut.number_of_hits);
match.RecordAdditionalInfo("last access time", shortcut.last_access_time);
match.RecordAdditionalInfo("original input text",
@@ -247,13 +409,11 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(
}
}
match.EnsureUWYTIsAllowedToBeDefault(input,
- client_->GetAcceptLanguages(),
+ languages_,
Peter Kasting 2016/04/14 23:52:33 Nit: Put on previous line
Alexander Yashkin 2016/04/15 09:14:14 Done.
client_->GetTemplateURLService());
// Try to mark pieces of the contents and description as matches if they
// appear in |input.text()|.
- const base::string16 term_string = base::i18n::ToLower(input.text());
- WordMap terms_map(CreateWordMapForString(term_string));
if (!terms_map.empty()) {
match.contents_class = ClassifyAllMatchesInString(
term_string, terms_map, match.contents, match.contents_class);
@@ -263,114 +423,6 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(
return match;
}
-// static
-ShortcutsProvider::WordMap ShortcutsProvider::CreateWordMapForString(
- const base::string16& text) {
- // First, convert |text| to a vector of the unique words in it.
- WordMap word_map;
- base::i18n::BreakIterator word_iter(text,
- base::i18n::BreakIterator::BREAK_WORD);
- if (!word_iter.Init())
- return word_map;
- std::vector<base::string16> words;
- while (word_iter.Advance()) {
- if (word_iter.IsWord())
- words.push_back(word_iter.GetString());
- }
- if (words.empty())
- return word_map;
- std::sort(words.begin(), words.end());
- words.erase(std::unique(words.begin(), words.end()), words.end());
-
- // Now create a map from (first character) to (words beginning with that
- // character). We insert in reverse lexicographical order and rely on the
- // multimap preserving insertion order for values with the same key. (This
- // is mandated in C++11, and part of that decision was based on a survey of
- // existing implementations that found that it was already true everywhere.)
- std::reverse(words.begin(), words.end());
- for (std::vector<base::string16>::const_iterator i(words.begin());
- i != words.end(); ++i)
- word_map.insert(std::make_pair((*i)[0], *i));
- return word_map;
-}
-
-// static
-ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(
- const base::string16& find_text,
- const WordMap& find_words,
- const base::string16& text,
- const ACMatchClassifications& original_class) {
- DCHECK(!find_text.empty());
- DCHECK(!find_words.empty());
-
- // The code below assumes |text| is nonempty and therefore the resulting
- // classification vector should always be nonempty as well. Returning early
- // if |text| is empty assures we'll return the (correct) empty vector rather
- // than a vector with a single (0, NONE) match.
- if (text.empty())
- return original_class;
-
- // First check whether |text| begins with |find_text| and mark that whole
- // section as a match if so.
- base::string16 text_lowercase(base::i18n::ToLower(text));
- ACMatchClassifications match_class;
- size_t last_position = 0;
- if (base::StartsWith(text_lowercase, find_text,
- base::CompareCase::SENSITIVE)) {
- match_class.push_back(
- ACMatchClassification(0, ACMatchClassification::MATCH));
- last_position = find_text.length();
- // If |text_lowercase| is actually equal to |find_text|, we don't need to
- // (and in fact shouldn't) put a trailing NONE classification after the end
- // of the string.
- if (last_position < text_lowercase.length()) {
- match_class.push_back(
- ACMatchClassification(last_position, ACMatchClassification::NONE));
- }
- } else {
- // |match_class| should start at position 0. If the first matching word is
- // found at position 0, this will be popped from the vector further down.
- match_class.push_back(
- ACMatchClassification(0, ACMatchClassification::NONE));
- }
-
- // Now, starting with |last_position|, check each character in
- // |text_lowercase| to see if we have words starting with that character in
- // |find_words|. If so, check each of them to see if they match the portion
- // of |text_lowercase| beginning with |last_position|. Accept the first
- // matching word found (which should be the longest possible match at this
- // location, given the construction of |find_words|) and add a MATCH region to
- // |match_class|, moving |last_position| to be after the matching word. If we
- // found no matching words, move to the next character and repeat.
- while (last_position < text_lowercase.length()) {
- std::pair<WordMap::const_iterator, WordMap::const_iterator> range(
- find_words.equal_range(text_lowercase[last_position]));
- size_t next_character = last_position + 1;
- for (WordMap::const_iterator i(range.first); i != range.second; ++i) {
- const base::string16& word = i->second;
- size_t word_end = last_position + word.length();
- if ((word_end <= text_lowercase.length()) &&
- !text_lowercase.compare(last_position, word.length(), word)) {
- // Collapse adjacent ranges into one.
- if (match_class.back().offset == last_position)
- match_class.pop_back();
-
- AutocompleteMatch::AddLastClassificationIfNecessary(
- &match_class, last_position, ACMatchClassification::MATCH);
- if (word_end < text_lowercase.length()) {
- match_class.push_back(
- ACMatchClassification(word_end, ACMatchClassification::NONE));
- }
- last_position = word_end;
- break;
- }
- }
- last_position = std::max(last_position, next_character);
- }
-
- return AutocompleteMatch::MergeClassifications(original_class, match_class);
-}
-
ShortcutsBackend::ShortcutMap::const_iterator ShortcutsProvider::FindFirstMatch(
const base::string16& keyword,
ShortcutsBackend* backend) {

Powered by Google App Engine
This is Rietveld 408576698