components/omnibox/browser/shortcuts_provider.cc - Issue 1877833002: Optimize shortcuts provider

Unified Diff: components/omnibox/browser/shortcuts_provider.cc

Issue 1877833002: Optimize shortcuts provider (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fixes after review, round 2 Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« components/omnibox/browser/shortcuts_provider.h ('K') | « components/omnibox/browser/shortcuts_provider.h ('k') | components/omnibox/browser/shortcuts_provider_unittest.cc » ('j') | components/omnibox/browser/shortcuts_provider_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/omnibox/browser/shortcuts_provider.cc

diff --git a/components/omnibox/browser/shortcuts_provider.cc b/components/omnibox/browser/shortcuts_provider.cc

index 22ee0c71543b97dc00a86e9acc4582fa098805e2..a4ab5f7f40eeea70a7942b3265ce6775cbf0262a 100644

--- a/components/omnibox/browser/shortcuts_provider.cc

+++ b/components/omnibox/browser/shortcuts_provider.cc

@@ -28,6 +28,7 @@

#include "components/omnibox/browser/autocomplete_provider_client.h"

#include "components/omnibox/browser/autocomplete_result.h"

#include "components/omnibox/browser/history_provider.h"

+#include "components/omnibox/browser/match_compare.h"

#include "components/omnibox/browser/omnibox_field_trial.h"

#include "components/omnibox/browser/url_prefix.h"

#include "components/prefs/pref_service.h"

@@ -51,6 +52,15 @@ class DestinationURLEqualsURL {

const int ShortcutsProvider::kShortcutsProviderDefaultMaxRelevance = 1199;

+ShortcutsProvider::ShortcutMatch::ShortcutMatch(

+ int relevance, const GURL& stripped_destination_url,

+ const ShortcutsDatabase::Shortcut* shortcut) :

Peter Kasting 2016/04/14 23:52:33 Nit: ':' goes at the beginning of the next line, n

Alexander Yashkin 2016/04/15 09:14:14 Done.

+ relevance(relevance),

Peter Kasting 2016/04/14 23:52:33 Nit: These lines are not sufficiently indented

Alexander Yashkin 2016/04/15 09:14:15 Done.

+ stripped_destination_url(stripped_destination_url),

+ shortcut(shortcut),

+ contents(shortcut->match_core.contents),

+ type(static_cast<AutocompleteMatch::Type>(shortcut->match_core.type)) {}

ShortcutsProvider::ShortcutsProvider(AutocompleteProviderClient* client)

: AutocompleteProvider(AutocompleteProvider::TYPE_SHORTCUTS),

client_(client),

@@ -118,6 +128,134 @@ ShortcutsProvider::~ShortcutsProvider() {

backend->RemoveObserver(this);

}

+// static

Peter Kasting 2016/04/14 23:52:33 Nit: Might want to move these functions up in a se

Alexander Yashkin 2016/04/15 09:14:14 Reverted move.

+ShortcutsProvider::WordMap ShortcutsProvider::CreateWordMapForString(

+ const base::string16& text) {

+ // First, convert |text| to a vector of the unique words in it.

+ WordMap word_map;

+ base::i18n::BreakIterator word_iter(text,

+ base::i18n::BreakIterator::BREAK_WORD);

+ if (!word_iter.Init())

+ return word_map;

+ std::vector<base::string16> words;

+ while (word_iter.Advance()) {

+ if (word_iter.IsWord())

+ words.push_back(word_iter.GetString());

+ }

+ if (words.empty())

+ return word_map;

+ std::sort(words.begin(), words.end());

+ words.erase(std::unique(words.begin(), words.end()), words.end());

+ // Now create a map from (first character) to (words beginning with that

+ // character). We insert in reverse lexicographical order and rely on the

+ // multimap preserving insertion order for values with the same key. (This

+ // is mandated in C++11, and part of that decision was based on a survey of

+ // existing implementations that found that it was already true everywhere.)

+ std::reverse(words.begin(), words.end());

+ for (std::vector<base::string16>::const_iterator i(words.begin());

+ i != words.end(); ++i)

+ word_map.insert(std::make_pair((*i)[0], *i));

+ return word_map;

+// static

+ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(

+ const base::string16& find_text,

+ const WordMap& find_words,

+ const base::string16& text,

+ const ACMatchClassifications& original_class) {

+ DCHECK(!find_text.empty());

+ DCHECK(!find_words.empty());

+ // The code below assumes |text| is nonempty and therefore the resulting

+ // classification vector should always be nonempty as well. Returning early

+ // if |text| is empty assures we'll return the (correct) empty vector rather

+ // than a vector with a single (0, NONE) match.

+ if (text.empty())

+ return original_class;

+ // First check whether |text| begins with |find_text| and mark that whole

+ // section as a match if so.

+ base::string16 text_lowercase(base::i18n::ToLower(text));

+ ACMatchClassifications match_class;

+ size_t last_position = 0;

+ if (base::StartsWith(text_lowercase, find_text,

+ base::CompareCase::SENSITIVE)) {

+ match_class.push_back(

+ ACMatchClassification(0, ACMatchClassification::MATCH));

+ last_position = find_text.length();

+ // If |text_lowercase| is actually equal to |find_text|, we don't need to

+ // (and in fact shouldn't) put a trailing NONE classification after the end

+ // of the string.

+ if (last_position < text_lowercase.length()) {

+ match_class.push_back(

+ ACMatchClassification(last_position, ACMatchClassification::NONE));

+ }

+ } else {

+ // |match_class| should start at position 0. If the first matching word is

+ // found at position 0, this will be popped from the vector further down.

+ match_class.push_back(

+ ACMatchClassification(0, ACMatchClassification::NONE));

+ }

+ // Now, starting with |last_position|, check each character in

+ // |text_lowercase| to see if we have words starting with that character in

+ // |find_words|. If so, check each of them to see if they match the portion

+ // of |text_lowercase| beginning with |last_position|. Accept the first

+ // matching word found (which should be the longest possible match at this

+ // location, given the construction of |find_words|) and add a MATCH region to

+ // |match_class|, moving |last_position| to be after the matching word. If we

+ // found no matching words, move to the next character and repeat.

+ while (last_position < text_lowercase.length()) {

+ std::pair<WordMap::const_iterator, WordMap::const_iterator> range(

+ find_words.equal_range(text_lowercase[last_position]));

+ size_t next_character = last_position + 1;

+ for (WordMap::const_iterator i(range.first); i != range.second; ++i) {

+ const base::string16& word = i->second;

+ size_t word_end = last_position + word.length();

+ if ((word_end <= text_lowercase.length()) &&

+ !text_lowercase.compare(last_position, word.length(), word)) {

+ // Collapse adjacent ranges into one.

+ if (match_class.back().offset == last_position)

+ match_class.pop_back();

+ AutocompleteMatch::AddLastClassificationIfNecessary(

+ &match_class, last_position, ACMatchClassification::MATCH);

+ if (word_end < text_lowercase.length()) {

+ match_class.push_back(

+ ACMatchClassification(word_end, ACMatchClassification::NONE));

+ }

+ last_position = word_end;

+ break;

+ }

+ last_position = std::max(last_position, next_character);

+ }

+ return AutocompleteMatch::MergeClassifications(original_class, match_class);

+// static

+void ShortcutsProvider::SortAndDedupMatches(

+ OmniboxEventProto::PageClassification page_classification,

+ std::vector<ShortcutMatch>* matches) {

+ // Sort matches such that duplicate matches are consecutive.

+ std::sort(matches->begin(), matches->end(),

+ DestinationSort<ShortcutMatch>(page_classification));

+ // Erase duplicate matches. Duplicate matches are those with

+ // stripped_destination_url fields equal and non empty.

+ matches->erase(

+ std::unique(matches->begin(), matches->end(),

+ [](const ShortcutMatch& elem1, const ShortcutMatch& elem2) {

+ return !elem1.stripped_destination_url.is_empty() &&

+ (elem1.stripped_destination_url ==

+ elem2.stripped_destination_url);

+ }),

+ matches->end());

void ShortcutsProvider::OnShortcutsLoaded() {

initialized_ = true;

}

@@ -138,6 +276,8 @@ void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {

max_relevance = kShortcutsProviderDefaultMaxRelevance;

TemplateURLService* template_url_service = client_->GetTemplateURLService();

const base::string16 fixed_up_input(FixupUserInput(input).second);

+ std::vector<ShortcutMatch> shortcut_matches;

for (ShortcutsBackend::ShortcutMap::const_iterator it =

FindFirstMatch(term_string, backend.get());

it != backend->shortcuts_map().end() &&

@@ -147,16 +287,21 @@ void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {

// Don't return shortcuts with zero relevance.

int relevance = CalculateScore(term_string, it->second, max_relevance);

if (relevance) {

- matches_.push_back(

- ShortcutToACMatch(it->second, relevance, input, fixed_up_input));

- matches_.back().ComputeStrippedDestinationURL(

- input, client_->GetAcceptLanguages(), template_url_service);

+ const ShortcutsDatabase::Shortcut& shortcut = it->second;

+ GURL stripped_destination_url(AutocompleteMatch::GURLToStrippedGURL(

+ shortcut.match_core.destination_url,

+ input,

+ languages_,

+ template_url_service,

+ shortcut.match_core.keyword));

+ shortcut_matches.push_back(

+ ShortcutMatch(relevance, stripped_destination_url, &(it->second)));

Peter Kasting 2016/04/14 23:52:33 Nit: Parens around -> operator not necessary

Alexander Yashkin 2016/04/15 09:14:14 Done.

}

// Remove duplicates. This is important because it's common to have multiple

// shortcuts pointing to the same URL, e.g., ma, mai, and mail all pointing

// to mail.google.com, so typing "m" will return them all. If we then simply

- // clamp to kMaxMatches and let the AutocompleteResult take care of

+ // clamp to kMaxMatches and let the SortAndDedupMatches take care of

// collapsing the duplicates, we'll effectively only be returning one match,

// instead of several possibilities.

@@ -164,37 +309,53 @@ void ShortcutsProvider::GetMatches(const AutocompleteInput& input) {

// |duplicate_matches| field--duplicates don't need to be preserved in the

// matches because they are only used for deletions, and this provider

// deletes matches based on the URL.

- AutocompleteResult::DedupMatchesByDestination(

- input.current_page_classification(), false, &matches_);

+ SortAndDedupMatches(input.current_page_classification(), &shortcut_matches);

// Find best matches.

std::partial_sort(

- matches_.begin(),

- matches_.begin() +

- std::min(AutocompleteProvider::kMaxMatches, matches_.size()),

- matches_.end(), &AutocompleteMatch::MoreRelevant);

- if (matches_.size() > AutocompleteProvider::kMaxMatches) {

- matches_.erase(matches_.begin() + AutocompleteProvider::kMaxMatches,

- matches_.end());

+ shortcut_matches.begin(),

+ shortcut_matches.begin() +

+ std::min(AutocompleteProvider::kMaxMatches, shortcut_matches.size()),

+ shortcut_matches.end(),

+ [](const ShortcutMatch& elem1, const ShortcutMatch& elem2) {

+ // For equal-relevance matches, we sort alphabetically, so that

+ // providers who return multiple elements at the same priority get a

+ // "stable" sort across multiple updates.

Peter Kasting 2016/04/14 23:52:33 Nit: This comment is a bit odd since this code is

Alexander Yashkin 2016/04/15 09:14:14 Done.

+ return elem1.relevance == elem2.relevance ?

+ elem1.contents < elem2.contents :

+ elem1.relevance > elem2.relevance;

+ });

+ if (shortcut_matches.size() > AutocompleteProvider::kMaxMatches) {

+ shortcut_matches.erase(

+ shortcut_matches.begin() + AutocompleteProvider::kMaxMatches,

+ shortcut_matches.end());

}

- // Guarantee that all scores are decreasing (but do not assign any scores

- // below 1).

- for (ACMatches::iterator it = matches_.begin(); it != matches_.end(); ++it) {

- max_relevance = std::min(max_relevance, it->relevance);

- it->relevance = max_relevance;

+ // Create and initialize autocomplete matches from shortcut matches.

Peter Kasting 2016/04/14 23:52:33 Nit: Wrap all comment lines as close to 80 columns

Alexander Yashkin 2016/04/15 09:14:14 Done.

+ // Also guarantee that all relevance scores are decreasing

+ // (but do not assign any scores below 1).

+ WordMap terms_map(CreateWordMapForString(term_string));

+ matches_.reserve(shortcut_matches.size());

+ for (ShortcutMatch& match : shortcut_matches) {

+ max_relevance = std::min(max_relevance, match.relevance);

+ match.relevance = max_relevance;

Peter Kasting 2016/04/14 23:52:33 Rather than update match.relevance, it would be be

Alexander Yashkin 2016/04/15 09:14:14 Done.

if (max_relevance > 1)

--max_relevance;

+ matches_.push_back(ShortcutMatchToACMatch(match, input, fixed_up_input,

+ term_string, terms_map));

}

-AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(

- const ShortcutsDatabase::Shortcut& shortcut,

- int relevance,

+AutocompleteMatch ShortcutsProvider::ShortcutMatchToACMatch(

+ const ShortcutMatch& shortcut_match,

const AutocompleteInput& input,

- const base::string16& fixed_up_input_text) {

+ const base::string16& fixed_up_input_text,

+ const base::string16 term_string,

+ const WordMap& terms_map) {

DCHECK(!input.text().empty());

+ const ShortcutsDatabase::Shortcut& shortcut = *shortcut_match.shortcut;

AutocompleteMatch match;

match.provider = this;

- match.relevance = relevance;

+ match.relevance = shortcut_match.relevance;

match.deletable = true;

match.fill_into_edit = shortcut.match_core.fill_into_edit;

match.destination_url = shortcut.match_core.destination_url;

@@ -208,6 +369,7 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(

match.transition = ui::PageTransitionFromInt(shortcut.match_core.transition);

match.type = static_cast<AutocompleteMatch::Type>(shortcut.match_core.type);

match.keyword = shortcut.match_core.keyword;

+ match.stripped_destination_url = shortcut_match.stripped_destination_url;

Peter Kasting 2016/04/14 23:52:33 Does this matter? It seems likely that Autocomple

Alexander Yashkin 2016/04/15 09:14:14 Done.

match.RecordAdditionalInfo("number of hits", shortcut.number_of_hits);

match.RecordAdditionalInfo("last access time", shortcut.last_access_time);

match.RecordAdditionalInfo("original input text",

@@ -247,13 +409,11 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(

}

match.EnsureUWYTIsAllowedToBeDefault(input,

- client_->GetAcceptLanguages(),

+ languages_,

Peter Kasting 2016/04/14 23:52:33 Nit: Put on previous line

Alexander Yashkin 2016/04/15 09:14:14 Done.

client_->GetTemplateURLService());

// Try to mark pieces of the contents and description as matches if they

// appear in |input.text()|.

- const base::string16 term_string = base::i18n::ToLower(input.text());

- WordMap terms_map(CreateWordMapForString(term_string));

if (!terms_map.empty()) {

match.contents_class = ClassifyAllMatchesInString(

term_string, terms_map, match.contents, match.contents_class);

@@ -263,114 +423,6 @@ AutocompleteMatch ShortcutsProvider::ShortcutToACMatch(

return match;

}

-// static

-ShortcutsProvider::WordMap ShortcutsProvider::CreateWordMapForString(

- const base::string16& text) {

- // First, convert |text| to a vector of the unique words in it.

- WordMap word_map;

- base::i18n::BreakIterator word_iter(text,

- base::i18n::BreakIterator::BREAK_WORD);

- if (!word_iter.Init())

- return word_map;

- std::vector<base::string16> words;

- while (word_iter.Advance()) {

- if (word_iter.IsWord())

- words.push_back(word_iter.GetString());

- }

- if (words.empty())

- return word_map;

- std::sort(words.begin(), words.end());

- words.erase(std::unique(words.begin(), words.end()), words.end());

- // Now create a map from (first character) to (words beginning with that

- // character). We insert in reverse lexicographical order and rely on the

- // multimap preserving insertion order for values with the same key. (This

- // is mandated in C++11, and part of that decision was based on a survey of

- // existing implementations that found that it was already true everywhere.)

- std::reverse(words.begin(), words.end());

- for (std::vector<base::string16>::const_iterator i(words.begin());

- i != words.end(); ++i)

- word_map.insert(std::make_pair((*i)[0], *i));

- return word_map;

-// static

-ACMatchClassifications ShortcutsProvider::ClassifyAllMatchesInString(

- const base::string16& find_text,

- const WordMap& find_words,

- const base::string16& text,

- const ACMatchClassifications& original_class) {

- DCHECK(!find_text.empty());

- DCHECK(!find_words.empty());

- // The code below assumes |text| is nonempty and therefore the resulting

- // classification vector should always be nonempty as well. Returning early

- // if |text| is empty assures we'll return the (correct) empty vector rather

- // than a vector with a single (0, NONE) match.

- if (text.empty())

- return original_class;

- // First check whether |text| begins with |find_text| and mark that whole

- // section as a match if so.

- base::string16 text_lowercase(base::i18n::ToLower(text));

- ACMatchClassifications match_class;

- size_t last_position = 0;

- if (base::StartsWith(text_lowercase, find_text,

- base::CompareCase::SENSITIVE)) {

- match_class.push_back(

- ACMatchClassification(0, ACMatchClassification::MATCH));

- last_position = find_text.length();

- // If |text_lowercase| is actually equal to |find_text|, we don't need to

- // (and in fact shouldn't) put a trailing NONE classification after the end

- // of the string.

- if (last_position < text_lowercase.length()) {

- match_class.push_back(

- ACMatchClassification(last_position, ACMatchClassification::NONE));

- }

- } else {

- // |match_class| should start at position 0. If the first matching word is

- // found at position 0, this will be popped from the vector further down.

- match_class.push_back(

- ACMatchClassification(0, ACMatchClassification::NONE));

- }

- // Now, starting with |last_position|, check each character in

- // |text_lowercase| to see if we have words starting with that character in

- // |find_words|. If so, check each of them to see if they match the portion

- // of |text_lowercase| beginning with |last_position|. Accept the first

- // matching word found (which should be the longest possible match at this

- // location, given the construction of |find_words|) and add a MATCH region to

- // |match_class|, moving |last_position| to be after the matching word. If we

- // found no matching words, move to the next character and repeat.

- while (last_position < text_lowercase.length()) {

- std::pair<WordMap::const_iterator, WordMap::const_iterator> range(

- find_words.equal_range(text_lowercase[last_position]));

- size_t next_character = last_position + 1;

- for (WordMap::const_iterator i(range.first); i != range.second; ++i) {

- const base::string16& word = i->second;

- size_t word_end = last_position + word.length();

- if ((word_end <= text_lowercase.length()) &&

- !text_lowercase.compare(last_position, word.length(), word)) {

- // Collapse adjacent ranges into one.

- if (match_class.back().offset == last_position)

- match_class.pop_back();

- AutocompleteMatch::AddLastClassificationIfNecessary(

- &match_class, last_position, ACMatchClassification::MATCH);

- if (word_end < text_lowercase.length()) {

- match_class.push_back(

- ACMatchClassification(word_end, ACMatchClassification::NONE));

- }

- last_position = word_end;

- break;

- }

- last_position = std::max(last_position, next_character);

- }

- return AutocompleteMatch::MergeClassifications(original_class, match_class);

ShortcutsBackend::ShortcutMap::const_iterator ShortcutsProvider::FindFirstMatch(

const base::string16& keyword,

ShortcutsBackend* backend) {