Chromium Code Reviews| Index: components/omnibox/browser/url_index_private_data.cc |
| diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc |
| index 54123a335ca874ae527c5babe079cf242c08fc5e..5ef4d83d2c6d23d392c0adf54e62f19cceeb5334 100644 |
| --- a/components/omnibox/browser/url_index_private_data.cc |
| +++ b/components/omnibox/browser/url_index_private_data.cc |
| @@ -81,6 +81,9 @@ bool LengthGreater(const base::string16& string_a, |
| return string_a.length() > string_b.length(); |
| } |
| +bool Less(const base::string16& string_a, const base::string16& string_b) { |
| + return (string_a.compare(string_b) <= 0); |
| +} |
| // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- |
| @@ -152,79 +155,42 @@ URLIndexPrivateData::URLIndexPrivateData() |
| post_scoring_item_count_(0) { |
| } |
| -ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
| - base::string16 search_string, |
| - size_t cursor_position, |
| - size_t max_matches, |
| - bookmarks::BookmarkModel* bookmark_model, |
| - TemplateURLService* template_url_service) { |
| - // If cursor position is set and useful (not at either end of the |
| - // string), allow the search string to be broken at cursor position. |
| - // We do this by pretending there's a space where the cursor is. |
| - if ((cursor_position != base::string16::npos) && |
| - (cursor_position < search_string.length()) && |
| - (cursor_position > 0)) { |
| - search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); |
| - } |
| - pre_filter_item_count_ = 0; |
| - post_filter_item_count_ = 0; |
| - post_scoring_item_count_ = 0; |
| +String16Vector URLIndexPrivateData::ExtractIndividualWordVector( |
| + const base::string16& search_string) { |
| // The search string we receive may contain escaped characters. For reducing |
| // the index we need individual, lower-cased words, ignoring escapings. For |
| // the final filtering we need whitespace separated substrings possibly |
| // containing escaped characters. |
| base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
| - base::string16 lower_unescaped_string = |
| - net::UnescapeURLComponent(lower_raw_string, |
| - net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
| + base::string16 lower_unescaped_string = net::UnescapeURLComponent( |
| + lower_raw_string, |
| + net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
| net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
| // Extract individual 'words' (as opposed to 'terms'; see below) from the |
| // search string. When the user types "colspec=ID%20Mstone Release" we get |
| // four 'words': "colspec", "id", "mstone" and "release". |
| - String16Vector lower_words( |
| - String16VectorFromString16(lower_unescaped_string, false, nullptr)); |
| - ScoredHistoryMatches scored_items; |
| + return String16VectorFromString16(lower_unescaped_string, false, nullptr); |
| +} |
| +HistoryIDSet URLIndexPrivateData::GetHistoryIDSet( |
| + const String16Vector& lower_words) { |
| // Do nothing if we have indexed no words (probably because we've not been |
| // initialized yet) or the search string has no words. |
| if (word_list_.empty() || lower_words.empty()) { |
| search_term_cache_.clear(); // Invalidate the term cache. |
| - return scored_items; |
| + return HistoryIDSet(); |
| } |
| - // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
| - // approach. |
| - ResetSearchTermCache(); |
| - |
| - HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); |
| + return HistoryIDSetFromWords(lower_words); |
| +} |
| - // Trim the candidate pool if it is large. Note that we do not filter out |
| - // items that do not contain the search terms as proper substrings -- doing |
| - // so is the performance-costly operation we are trying to avoid in order |
| - // to maintain omnibox responsiveness. |
| - const size_t kItemsToScoreLimit = 500; |
| - pre_filter_item_count_ = history_id_set.size(); |
| - // If we trim the results set we do not want to cache the results for next |
| - // time as the user's ultimately desired result could easily be eliminated |
| - // in this early rough filter. |
| - bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit); |
| - if (was_trimmed) { |
| - HistoryIDVector history_ids; |
| - std::copy(history_id_set.begin(), history_id_set.end(), |
| - std::back_inserter(history_ids)); |
| - // Trim down the set by sorting by typed-count, visit-count, and last |
| - // visit. |
| - HistoryItemFactorGreater |
| - item_factor_functor(history_info_map_); |
| - std::partial_sort(history_ids.begin(), |
| - history_ids.begin() + kItemsToScoreLimit, |
| - history_ids.end(), |
| - item_factor_functor); |
| - history_id_set.clear(); |
| - std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit, |
| - std::inserter(history_id_set, history_id_set.end())); |
| - post_filter_item_count_ = history_id_set.size(); |
| - } |
| +ScoredHistoryMatches URLIndexPrivateData::GetScoredItemsForSearchString( |
| + const base::string16& search_string, |
| + const HistoryIDSet& history_id_set, |
| + const size_t& max_matches, |
| + bookmarks::BookmarkModel* bookmark_model, |
| + TemplateURLService* template_url_service) { |
| + ScoredHistoryMatches scored_items; |
| // Pass over all of the candidates filtering out any without a proper |
| // substring match, inserting those which pass in order by score. Note that |
| @@ -237,9 +203,11 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
| // we only want to break up the search string on 'true' whitespace rather than |
| // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we |
| // get two 'terms': "colspec=id%20mstone" and "release". |
| + base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
| String16Vector lower_raw_terms = base::SplitString( |
| lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE, |
| base::SPLIT_WANT_NONEMPTY); |
| + |
| if (lower_raw_terms.empty()) { |
| // Don't score matches when there are no terms to score against. (It's |
| // possible that the word break iterater that extracts words to search |
| @@ -258,18 +226,100 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
| base::Time::Now())).ScoredMatches(); |
| // Select and sort only the top |max_matches| results. |
| + |
|
Mark P
2016/08/18 19:36:53
nit: don't add a blank line
Lavar Askew
2016/08/19 04:46:07
Done.
|
| if (scored_items.size() > max_matches) { |
| std::partial_sort(scored_items.begin(), |
| scored_items.begin() + |
| max_matches, |
| scored_items.end(), |
| ScoredHistoryMatch::MatchScoreGreater); |
| - scored_items.resize(max_matches); |
| + scored_items.resize(max_matches); |
| } else { |
| std::sort(scored_items.begin(), scored_items.end(), |
| ScoredHistoryMatch::MatchScoreGreater); |
| } |
| - post_scoring_item_count_ = scored_items.size(); |
| + |
| + return scored_items; |
| +} |
| + |
| +ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
| + base::string16 search_string, |
| + size_t cursor_position, |
| + size_t max_matches, |
| + bookmarks::BookmarkModel* bookmark_model, |
| + TemplateURLService* template_url_service) { |
| + pre_filter_item_count_ = 0; |
| + post_filter_item_count_ = 0; |
| + post_scoring_item_count_ = 0; |
| + |
| + // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
| + // approach. |
| + ResetSearchTermCache(); |
| + |
| + String16Vector lower_words = ExtractIndividualWordVector(search_string); |
| + HistoryIDSet history_id_set = GetHistoryIDSet(lower_words); |
| + |
| + // If cursor position is set and useful (not at either end of the |
| + // string), allow the search string to be broken at cursor position. |
| + // We do this by pretending there's a space where the cursor is. |
| + if ((cursor_position != base::string16::npos) && |
| + (cursor_position < search_string.length()) && (cursor_position > 0)) { |
| + base::string16 search_string_with_word_break(search_string); |
| + HistoryIDSet history_id_set_with_word_break; |
|
Mark P
2016/08/18 19:36:53
This declaration doesn't need to be at this level;
Lavar Askew
2016/08/19 04:46:07
Can't believe I missed that...Done.
|
| + |
| + base::string16 word_break = base::ASCIIToUTF16(" "); |
| + search_string_with_word_break.insert(cursor_position, word_break); |
| + |
| + // Add to history_id_set the ids that are related to the original |
| + // search string, but with the word break. |
| + String16Vector lower_words_with_word_break = |
| + ExtractIndividualWordVector(search_string_with_word_break); |
| + |
| + // check to see if lower_words and lower_words_with_word_break |
|
Mark P
2016/08/18 19:36:53
nit: start sentence with capital letter ("Check"..
Lavar Askew
2016/08/19 04:46:07
Done.
|
| + // are the same size. If they are then this could indicate that |
| + // the word vectors contain the same words |
| + if (lower_words.size() != lower_words_with_word_break.size()) { |
| + std::sort(lower_words.begin(), lower_words.end(), Less); |
| + std::sort(lower_words_with_word_break.begin(), |
| + lower_words_with_word_break.end(), Less); |
| + |
|
Mark P
2016/08/18 19:36:53
nit: no need for this blank line I think
|
| + if (lower_words != lower_words_with_word_break) { |
| + history_id_set_with_word_break = |
| + GetHistoryIDSet(lower_words_with_word_break); |
| + |
|
Mark P
2016/08/18 19:36:53
nit: no need for this blank line I think
|
| + history_id_set.insert(history_id_set_with_word_break.begin(), |
| + history_id_set_with_word_break.end()); |
| + } |
| + } |
| + } |
| + |
| + // Trim the candidate pool if it is large. Note that we do not filter out |
| + // items that do not contain the search terms as proper substrings -- doing |
| + // so is the performance-costly operation we are trying to avoid in order |
| + // to maintain omnibox responsiveness. |
| + pre_filter_item_count_ = history_id_set.size(); |
| + // If we trim the results set we do not want to cache the results for next |
| + // time as the user's ultimately desired result could easily be eliminated |
| + // in this early rough filter. |
| + bool was_trimmed = (pre_filter_item_count_ > max_matches); |
| + if (was_trimmed) { |
| + HistoryIDVector history_ids; |
| + std::copy(history_id_set.begin(), history_id_set.end(), |
| + std::back_inserter(history_ids)); |
| + // Trim down the set by sorting by typed-count, visit-count, and last |
| + // visit. |
| + HistoryItemFactorGreater item_factor_functor(history_info_map_); |
| + std::partial_sort(history_ids.begin(), history_ids.begin() + max_matches, |
| + history_ids.end(), item_factor_functor); |
| + history_id_set.clear(); |
| + std::copy(history_ids.begin(), history_ids.begin() + max_matches, |
| + std::inserter(history_id_set, history_id_set.end())); |
| + post_filter_item_count_ = history_id_set.size(); |
| + } |
| + |
| + ScoredHistoryMatches scored_items = |
| + GetScoredItemsForSearchString(search_string, history_id_set, max_matches, |
| + bookmark_model, template_url_service); |
| if (was_trimmed) { |
| search_term_cache_.clear(); // Invalidate the term cache. |
| @@ -277,10 +327,11 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
| // Remove any stale SearchTermCacheItems. |
| for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); |
| cache_iter != search_term_cache_.end(); ) { |
| - if (!cache_iter->second.used_) |
| + if (!cache_iter->second.used_) { |
| search_term_cache_.erase(cache_iter++); |
| - else |
| + } else { |
| ++cache_iter; |
| + } |
| } |
| } |