Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(233)

Unified Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2187343002: Generating autocomplete results with and without word breaks in the Omnibox. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Code review fixes for patch entitled "Generating autocomplete results with and without word breaks … Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/omnibox/browser/url_index_private_data.cc
diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc
index 54123a335ca874ae527c5babe079cf242c08fc5e..44f7d292ddbf6a6f5192368d0fbaddf4a64174bd 100644
--- a/components/omnibox/browser/url_index_private_data.cc
+++ b/components/omnibox/browser/url_index_private_data.cc
@@ -152,79 +152,48 @@ URLIndexPrivateData::URLIndexPrivateData()
post_scoring_item_count_(0) {
}
-ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
- base::string16 search_string,
- size_t cursor_position,
- size_t max_matches,
- bookmarks::BookmarkModel* bookmark_model,
- TemplateURLService* template_url_service) {
- // If cursor position is set and useful (not at either end of the
- // string), allow the search string to be broken at cursor position.
- // We do this by pretending there's a space where the cursor is.
- if ((cursor_position != base::string16::npos) &&
- (cursor_position < search_string.length()) &&
- (cursor_position > 0)) {
- search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
- }
- pre_filter_item_count_ = 0;
- post_filter_item_count_ = 0;
- post_scoring_item_count_ = 0;
+HistoryIDSet URLIndexPrivateData::HistoryItemsForWords(
+ base::string16 search_string) {
+
+ HistoryIDSet history_id_set;
Mark P 2016/08/10 17:46:19 This variable isn't needed; simply return HistoryI
Lavar Askew 2016/08/18 03:07:51 Done.
+
// The search string we receive may contain escaped characters. For reducing
// the index we need individual, lower-cased words, ignoring escapings. For
// the final filtering we need whitespace separated substrings possibly
// containing escaped characters.
- base::string16 lower_raw_string(base::i18n::ToLower(search_string));
+ base::string16 lower_raw_string(
+ base::i18n::ToLower(search_string));
base::string16 lower_unescaped_string =
- net::UnescapeURLComponent(lower_raw_string,
- net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
- net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
+ net::UnescapeURLComponent(lower_raw_string,
+ net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
+ net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
// Extract individual 'words' (as opposed to 'terms'; see below) from the
// search string. When the user types "colspec=ID%20Mstone Release" we get
// four 'words': "colspec", "id", "mstone" and "release".
String16Vector lower_words(
- String16VectorFromString16(lower_unescaped_string, false, nullptr));
- ScoredHistoryMatches scored_items;
+ String16VectorFromString16(
+ lower_unescaped_string, false, nullptr));
// Do nothing if we have indexed no words (probably because we've not been
// initialized yet) or the search string has no words.
if (word_list_.empty() || lower_words.empty()) {
search_term_cache_.clear(); // Invalidate the term cache.
- return scored_items;
+ return history_id_set;
}
- // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
- // approach.
- ResetSearchTermCache();
+ history_id_set =
+ HistoryIDSetFromWords(lower_words);
- HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
+ return history_id_set;
+}
- // Trim the candidate pool if it is large. Note that we do not filter out
- // items that do not contain the search terms as proper substrings -- doing
- // so is the performance-costly operation we are trying to avoid in order
- // to maintain omnibox responsiveness.
- const size_t kItemsToScoreLimit = 500;
- pre_filter_item_count_ = history_id_set.size();
- // If we trim the results set we do not want to cache the results for next
- // time as the user's ultimately desired result could easily be eliminated
- // in this early rough filter.
- bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
- if (was_trimmed) {
- HistoryIDVector history_ids;
- std::copy(history_id_set.begin(), history_id_set.end(),
- std::back_inserter(history_ids));
- // Trim down the set by sorting by typed-count, visit-count, and last
- // visit.
- HistoryItemFactorGreater
- item_factor_functor(history_info_map_);
- std::partial_sort(history_ids.begin(),
- history_ids.begin() + kItemsToScoreLimit,
- history_ids.end(),
- item_factor_functor);
- history_id_set.clear();
- std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
- std::inserter(history_id_set, history_id_set.end()));
- post_filter_item_count_ = history_id_set.size();
- }
+ScoredHistoryMatches URLIndexPrivateData::GetScoredItemsForSearchString (
+ base::string16 search_string,
+ HistoryIDSet history_id_set,
+ size_t max_matches,
+ bookmarks::BookmarkModel* bookmark_model,
+ TemplateURLService* template_url_service) {
+ ScoredHistoryMatches scored_items;
// Pass over all of the candidates filtering out any without a proper
// substring match, inserting those which pass in order by score. Note that
@@ -237,9 +206,12 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
// we only want to break up the search string on 'true' whitespace rather than
// escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
// get two 'terms': "colspec=id%20mstone" and "release".
+ base::string16 lower_raw_string(
+ base::i18n::ToLower(search_string));
String16Vector lower_raw_terms = base::SplitString(
lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
+
if (lower_raw_terms.empty()) {
// Don't score matches when there are no terms to score against. (It's
// possible that the word break iterater that extracts words to search
@@ -264,27 +236,154 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
max_matches,
scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater);
- scored_items.resize(max_matches);
- } else {
+ scored_items.resize(max_matches);
+ }
+ else {
std::sort(scored_items.begin(), scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater);
}
- post_scoring_item_count_ = scored_items.size();
+ return scored_items;
+}
+
+
+bool URLIndexPrivateData::TrimCandidatePool (HistoryIDSet history_id_set) {
+
+ // Trim the candidate pool if it is large. Note that we do not filter out
Mark P 2016/08/10 17:46:19 I assume this whole block was copied with no modif
Lavar Askew 2016/08/18 03:07:51 Yes, you are correct. Done.
+ // items that do not contain the search terms as proper substrings -- doing
+ // so is the performance-costly operation we are trying to avoid in order
+ // to maintain omnibox responsiveness.
+ const size_t kItemsToScoreLimit = 500;
+ pre_filter_item_count_ = history_id_set.size();
+ // If we trim the results set we do not want to cache the results for next
+ // time as the user's ultimately desired result could easily be eliminated
+ // in this early rough filter.
+ bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
if (was_trimmed) {
+ HistoryIDVector history_ids;
+ std::copy(history_id_set.begin(), history_id_set.end(),
+ std::back_inserter(history_ids));
+ // Trim down the set by sorting by typed-count, visit-count, and last
+ // visit.
+ HistoryItemFactorGreater
+ item_factor_functor(history_info_map_);
+ std::partial_sort(history_ids.begin(),
+ history_ids.begin() + kItemsToScoreLimit,
+ history_ids.end(),
+ item_factor_functor);
+ history_id_set.clear();
+ std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
+ std::inserter(history_id_set, history_id_set.end()));
+ post_filter_item_count_ = history_id_set.size();
+ }
+
+ return was_trimmed;
+}
+
+ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
+ base::string16 search_string,
+ size_t cursor_position,
+ size_t max_matches,
+ bookmarks::BookmarkModel* bookmark_model,
+ TemplateURLService* template_url_service) {
+ pre_filter_item_count_ = 0;
+ post_filter_item_count_ = 0;
+ post_scoring_item_count_ = 0;
+
+ // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
+ // approach.
+ ResetSearchTermCache();
+
+ HistoryIDSet history_id_set = HistoryItemsForWords(search_string);
+
+ bool history_id_set_without_word_break_was_trimmed =
+ TrimCandidatePool(history_id_set);
+
+ ScoredHistoryMatches scored_items_without_word_break =
+ GetScoredItemsForSearchString(
+ search_string,
+ history_id_set,
+ max_matches,
+ bookmark_model,
+ template_url_service);
+
+ ScoredHistoryMatches all_scored_items;
+ ScoredHistoryMatches scored_items_with_word_break;
+
+ bool history_id_set_with_word_break_was_trimmed = false;
+
+ // If cursor position is set and useful (not at either end of the
+ // string), allow the search string to be broken at cursor position.
+ // We do this by pretending there's a space where the cursor is.
+ if ((cursor_position != base::string16::npos) &&
+ (cursor_position < search_string.length()) &&
+ (cursor_position > 0)) {
+ base::string16 search_string_with_word_break = search_string;
+ HistoryIDSet history_id_set_with_word_break;
+
+ base::string16 word_break =
+ base::ASCIIToUTF16(" ");
+ search_string_with_word_break.insert(cursor_position, word_break);
+
+ // Add to history_id_set the ids that are related to the original
+ // search string, but with the word break.
+ history_id_set_with_word_break =
+ HistoryItemsForWords(search_string_with_word_break);
Mark P 2016/08/10 17:46:19 This might do unnecessary work if the new search s
+
+ history_id_set_with_word_break_was_trimmed =
+ TrimCandidatePool (history_id_set_with_word_break);
+
+ scored_items_with_word_break = GetScoredItemsForSearchString(
+ search_string_with_word_break,
+ history_id_set_with_word_break,
+ max_matches,
+ bookmark_model,
+ template_url_service);
+ }
+
+ if (history_id_set_without_word_break_was_trimmed ||
+ history_id_set_with_word_break_was_trimmed) {
search_term_cache_.clear(); // Invalidate the term cache.
- } else {
+ }
+ else {
// Remove any stale SearchTermCacheItems.
for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
cache_iter != search_term_cache_.end(); ) {
- if (!cache_iter->second.used_)
+ if (!cache_iter->second.used_) {
search_term_cache_.erase(cache_iter++);
- else
+ }
+ else {
++cache_iter;
+ }
}
}
- return scored_items;
+ // all_scored_items represents the unification the
+ //ScoredHistoryMatches for the search string with and without the word break.
Mark P 2016/08/10 17:46:19 The code below can be made more efficient and easi
Lavar Askew 2016/08/18 03:07:51 Done.
+ if (scored_items_without_word_break.size() > 0 &&
+ scored_items_with_word_break.size() > 0) {
+ post_scoring_item_count_ =
+ scored_items_without_word_break.size()
+ + scored_items_with_word_break.size();
+
+ all_scored_items.reserve(post_scoring_item_count_);
+ all_scored_items.insert(all_scored_items.end(),
+ scored_items_without_word_break.begin(),
+ scored_items_without_word_break.end());
+ all_scored_items.insert(all_scored_items.end(),
+ scored_items_with_word_break.begin(),
+ scored_items_with_word_break.end());
+ }
+ else if (scored_items_without_word_break.size() == 0 &&
+ scored_items_with_word_break.size() > 0) {
+ all_scored_items = scored_items_with_word_break;
+ }
+ else if (scored_items_without_word_break.size() > 0 &&
+ scored_items_with_word_break.size() == 0) {
+ all_scored_items = scored_items_without_word_break;
+ }
+
Mark P 2016/08/10 17:46:19 I could imagine situations where the items overlap
Lavar Askew 2016/08/18 03:07:51 Done.
+ return all_scored_items;
Mark P 2016/08/10 17:46:19 Also, the number of items we're supposed to return
Lavar Askew 2016/08/18 03:07:52 Done.
}
bool URLIndexPrivateData::UpdateURL(

Powered by Google App Engine
This is Rietveld 408576698