components/omnibox/browser/url_index_private_data.cc - Issue 2187343002: Generating autocomplete results with and without word breaks in the Omnibox.

Unified Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2187343002: Generating autocomplete results with and without word breaks in the Omnibox. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Code review fixes for patch entitled "Generating autocomplete results with and without word breaks … Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: components/omnibox/browser/url_index_private_data.cc

diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc

index 54123a335ca874ae527c5babe079cf242c08fc5e..44f7d292ddbf6a6f5192368d0fbaddf4a64174bd 100644

--- a/components/omnibox/browser/url_index_private_data.cc

+++ b/components/omnibox/browser/url_index_private_data.cc

@@ -152,79 +152,48 @@ URLIndexPrivateData::URLIndexPrivateData()

post_scoring_item_count_(0) {

}

-ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

- base::string16 search_string,

- size_t cursor_position,

- size_t max_matches,

- bookmarks::BookmarkModel* bookmark_model,

- TemplateURLService* template_url_service) {

- // If cursor position is set and useful (not at either end of the

- // string), allow the search string to be broken at cursor position.

- // We do this by pretending there's a space where the cursor is.

- if ((cursor_position != base::string16::npos) &&

- (cursor_position < search_string.length()) &&

- (cursor_position > 0)) {

- search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

- }

- pre_filter_item_count_ = 0;

- post_filter_item_count_ = 0;

- post_scoring_item_count_ = 0;

+HistoryIDSet URLIndexPrivateData::HistoryItemsForWords(

+ base::string16 search_string) {

+ HistoryIDSet history_id_set;

Mark P 2016/08/10 17:46:19 This variable isn't needed; simply return HistoryI

Lavar Askew 2016/08/18 03:07:51 Done.

// The search string we receive may contain escaped characters. For reducing

// the index we need individual, lower-cased words, ignoring escapings. For

// the final filtering we need whitespace separated substrings possibly

// containing escaped characters.

- base::string16 lower_raw_string(base::i18n::ToLower(search_string));

+ base::string16 lower_raw_string(

+ base::i18n::ToLower(search_string));

base::string16 lower_unescaped_string =

- net::UnescapeURLComponent(lower_raw_string,

- net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |

- net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

+ net::UnescapeURLComponent(lower_raw_string,

+ net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |

+ net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

// Extract individual 'words' (as opposed to 'terms'; see below) from the

// search string. When the user types "colspec=ID%20Mstone Release" we get

// four 'words': "colspec", "id", "mstone" and "release".

String16Vector lower_words(

- String16VectorFromString16(lower_unescaped_string, false, nullptr));

- ScoredHistoryMatches scored_items;

+ String16VectorFromString16(

+ lower_unescaped_string, false, nullptr));

// Do nothing if we have indexed no words (probably because we've not been

// initialized yet) or the search string has no words.

if (word_list_.empty() || lower_words.empty()) {

search_term_cache_.clear(); // Invalidate the term cache.

- return scored_items;

+ return history_id_set;

}

- // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

- // approach.

- ResetSearchTermCache();

+ history_id_set =

+ HistoryIDSetFromWords(lower_words);

- HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);

+ return history_id_set;

- // Trim the candidate pool if it is large. Note that we do not filter out

- // items that do not contain the search terms as proper substrings -- doing

- // so is the performance-costly operation we are trying to avoid in order

- // to maintain omnibox responsiveness.

- const size_t kItemsToScoreLimit = 500;

- pre_filter_item_count_ = history_id_set.size();

- // If we trim the results set we do not want to cache the results for next

- // time as the user's ultimately desired result could easily be eliminated

- // in this early rough filter.

- bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

- if (was_trimmed) {

- HistoryIDVector history_ids;

- std::copy(history_id_set.begin(), history_id_set.end(),

- std::back_inserter(history_ids));

- // Trim down the set by sorting by typed-count, visit-count, and last

- // visit.

- HistoryItemFactorGreater

- item_factor_functor(history_info_map_);

- std::partial_sort(history_ids.begin(),

- history_ids.begin() + kItemsToScoreLimit,

- history_ids.end(),

- item_factor_functor);

- history_id_set.clear();

- std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

- std::inserter(history_id_set, history_id_set.end()));

- post_filter_item_count_ = history_id_set.size();

- }

+ScoredHistoryMatches URLIndexPrivateData::GetScoredItemsForSearchString (

+ base::string16 search_string,

+ HistoryIDSet history_id_set,

+ size_t max_matches,

+ bookmarks::BookmarkModel* bookmark_model,

+ TemplateURLService* template_url_service) {

+ ScoredHistoryMatches scored_items;

// Pass over all of the candidates filtering out any without a proper

// substring match, inserting those which pass in order by score. Note that

@@ -237,9 +206,12 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

// we only want to break up the search string on 'true' whitespace rather than

// escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

// get two 'terms': "colspec=id%20mstone" and "release".

+ base::string16 lower_raw_string(

+ base::i18n::ToLower(search_string));

String16Vector lower_raw_terms = base::SplitString(

lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,

base::SPLIT_WANT_NONEMPTY);

if (lower_raw_terms.empty()) {

// Don't score matches when there are no terms to score against. (It's

// possible that the word break iterater that extracts words to search

@@ -264,27 +236,154 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

max_matches,

scored_items.end(),

ScoredHistoryMatch::MatchScoreGreater);

- scored_items.resize(max_matches);

- } else {

+ scored_items.resize(max_matches);

+ }

+ else {

std::sort(scored_items.begin(), scored_items.end(),

ScoredHistoryMatch::MatchScoreGreater);

}

- post_scoring_item_count_ = scored_items.size();

+ return scored_items;

+bool URLIndexPrivateData::TrimCandidatePool (HistoryIDSet history_id_set) {

+ // Trim the candidate pool if it is large. Note that we do not filter out

Mark P 2016/08/10 17:46:19 I assume this whole block was copied with no modif

Lavar Askew 2016/08/18 03:07:51 Yes, you are correct. Done.

+ // items that do not contain the search terms as proper substrings -- doing

+ // so is the performance-costly operation we are trying to avoid in order

+ // to maintain omnibox responsiveness.

+ const size_t kItemsToScoreLimit = 500;

+ pre_filter_item_count_ = history_id_set.size();

+ // If we trim the results set we do not want to cache the results for next

+ // time as the user's ultimately desired result could easily be eliminated

+ // in this early rough filter.

+ bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

if (was_trimmed) {

+ HistoryIDVector history_ids;

+ std::copy(history_id_set.begin(), history_id_set.end(),

+ std::back_inserter(history_ids));

+ // Trim down the set by sorting by typed-count, visit-count, and last

+ // visit.

+ HistoryItemFactorGreater

+ item_factor_functor(history_info_map_);

+ std::partial_sort(history_ids.begin(),

+ history_ids.begin() + kItemsToScoreLimit,

+ history_ids.end(),

+ item_factor_functor);

+ history_id_set.clear();

+ std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,

+ std::inserter(history_id_set, history_id_set.end()));

+ post_filter_item_count_ = history_id_set.size();

+ }

+ return was_trimmed;

+ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

+ base::string16 search_string,

+ size_t cursor_position,

+ size_t max_matches,

+ bookmarks::BookmarkModel* bookmark_model,

+ TemplateURLService* template_url_service) {

+ pre_filter_item_count_ = 0;

+ post_filter_item_count_ = 0;

+ post_scoring_item_count_ = 0;

+ // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

+ // approach.

+ ResetSearchTermCache();

+ HistoryIDSet history_id_set = HistoryItemsForWords(search_string);

+ bool history_id_set_without_word_break_was_trimmed =

+ TrimCandidatePool(history_id_set);

+ ScoredHistoryMatches scored_items_without_word_break =

+ GetScoredItemsForSearchString(

+ search_string,

+ history_id_set,

+ max_matches,

+ bookmark_model,

+ template_url_service);

+ ScoredHistoryMatches all_scored_items;

+ ScoredHistoryMatches scored_items_with_word_break;

+ bool history_id_set_with_word_break_was_trimmed = false;

+ // If cursor position is set and useful (not at either end of the

+ // string), allow the search string to be broken at cursor position.

+ // We do this by pretending there's a space where the cursor is.

+ if ((cursor_position != base::string16::npos) &&

+ (cursor_position < search_string.length()) &&

+ (cursor_position > 0)) {

+ base::string16 search_string_with_word_break = search_string;

+ HistoryIDSet history_id_set_with_word_break;

+ base::string16 word_break =

+ base::ASCIIToUTF16(" ");

+ search_string_with_word_break.insert(cursor_position, word_break);

+ // Add to history_id_set the ids that are related to the original

+ // search string, but with the word break.

+ history_id_set_with_word_break =

+ HistoryItemsForWords(search_string_with_word_break);

Mark P 2016/08/10 17:46:19 This might do unnecessary work if the new search s

+ history_id_set_with_word_break_was_trimmed =

+ TrimCandidatePool (history_id_set_with_word_break);

+ scored_items_with_word_break = GetScoredItemsForSearchString(

+ search_string_with_word_break,

+ history_id_set_with_word_break,

+ max_matches,

+ bookmark_model,

+ template_url_service);

+ }

+ if (history_id_set_without_word_break_was_trimmed ||

+ history_id_set_with_word_break_was_trimmed) {

search_term_cache_.clear(); // Invalidate the term cache.

- } else {

+ }

+ else {

// Remove any stale SearchTermCacheItems.

for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();

cache_iter != search_term_cache_.end(); ) {

- if (!cache_iter->second.used_)

+ if (!cache_iter->second.used_) {

search_term_cache_.erase(cache_iter++);

- else

+ }

+ else {

++cache_iter;

+ }

}

- return scored_items;

+ // all_scored_items represents the unification the

+ //ScoredHistoryMatches for the search string with and without the word break.

Mark P 2016/08/10 17:46:19 The code below can be made more efficient and easi

Lavar Askew 2016/08/18 03:07:51 Done.

+ if (scored_items_without_word_break.size() > 0 &&

+ scored_items_with_word_break.size() > 0) {

+ post_scoring_item_count_ =

+ scored_items_without_word_break.size()

+ + scored_items_with_word_break.size();

+ all_scored_items.reserve(post_scoring_item_count_);

+ all_scored_items.insert(all_scored_items.end(),

+ scored_items_without_word_break.begin(),

+ scored_items_without_word_break.end());

+ all_scored_items.insert(all_scored_items.end(),

+ scored_items_with_word_break.begin(),

+ scored_items_with_word_break.end());

+ }

+ else if (scored_items_without_word_break.size() == 0 &&

+ scored_items_with_word_break.size() > 0) {

+ all_scored_items = scored_items_with_word_break;

+ }

+ else if (scored_items_without_word_break.size() > 0 &&

+ scored_items_with_word_break.size() == 0) {

+ all_scored_items = scored_items_without_word_break;

+ }

Mark P 2016/08/10 17:46:19 I could imagine situations where the items overlap

Lavar Askew 2016/08/18 03:07:51 Done.

+ return all_scored_items;

Mark P 2016/08/10 17:46:19 Also, the number of items we're supposed to return

Lavar Askew 2016/08/18 03:07:52 Done.

}

bool URLIndexPrivateData::UpdateURL(

« components/omnibox/browser/url_index_private_data.h ('K') | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »