Index: components/omnibox/browser/url_index_private_data.cc |
diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc |
index 54123a335ca874ae527c5babe079cf242c08fc5e..68e06c8622edd1e9ba1c961a7930895c14fe6406 100644 |
--- a/components/omnibox/browser/url_index_private_data.cc |
+++ b/components/omnibox/browser/url_index_private_data.cc |
@@ -81,6 +81,9 @@ bool LengthGreater(const base::string16& string_a, |
return string_a.length() > string_b.length(); |
} |
+bool Less(const base::string16& string_a, const base::string16& string_b) { |
+ return (string_a.compare(string_b) <= 0); |
+} |
// UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- |
@@ -152,79 +155,42 @@ URLIndexPrivateData::URLIndexPrivateData() |
post_scoring_item_count_(0) { |
} |
-ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
- base::string16 search_string, |
- size_t cursor_position, |
- size_t max_matches, |
- bookmarks::BookmarkModel* bookmark_model, |
- TemplateURLService* template_url_service) { |
- // If cursor position is set and useful (not at either end of the |
- // string), allow the search string to be broken at cursor position. |
- // We do this by pretending there's a space where the cursor is. |
- if ((cursor_position != base::string16::npos) && |
- (cursor_position < search_string.length()) && |
- (cursor_position > 0)) { |
- search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); |
- } |
- pre_filter_item_count_ = 0; |
- post_filter_item_count_ = 0; |
- post_scoring_item_count_ = 0; |
+String16Vector URLIndexPrivateData::ExtractIndividualWordVector( |
+ const base::string16& search_string) { |
// The search string we receive may contain escaped characters. For reducing |
// the index we need individual, lower-cased words, ignoring escapings. For |
// the final filtering we need whitespace separated substrings possibly |
// containing escaped characters. |
base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
- base::string16 lower_unescaped_string = |
- net::UnescapeURLComponent(lower_raw_string, |
- net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
+ base::string16 lower_unescaped_string = net::UnescapeURLComponent( |
+ lower_raw_string, |
+ net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
// Extract individual 'words' (as opposed to 'terms'; see below) from the |
// search string. When the user types "colspec=ID%20Mstone Release" we get |
// four 'words': "colspec", "id", "mstone" and "release". |
- String16Vector lower_words( |
- String16VectorFromString16(lower_unescaped_string, false, nullptr)); |
- ScoredHistoryMatches scored_items; |
+ return String16VectorFromString16(lower_unescaped_string, false, nullptr); |
+} |
+HistoryIDSet URLIndexPrivateData::GetHistoryIDSet( |
+ const String16Vector& lower_words) { |
// Do nothing if we have indexed no words (probably because we've not been |
// initialized yet) or the search string has no words. |
if (word_list_.empty() || lower_words.empty()) { |
search_term_cache_.clear(); // Invalidate the term cache. |
- return scored_items; |
+ return HistoryIDSet(); |
} |
- // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
- // approach. |
- ResetSearchTermCache(); |
- |
- HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); |
+ return HistoryIDSetFromWords(lower_words); |
+} |
- // Trim the candidate pool if it is large. Note that we do not filter out |
- // items that do not contain the search terms as proper substrings -- doing |
- // so is the performance-costly operation we are trying to avoid in order |
- // to maintain omnibox responsiveness. |
- const size_t kItemsToScoreLimit = 500; |
- pre_filter_item_count_ = history_id_set.size(); |
- // If we trim the results set we do not want to cache the results for next |
- // time as the user's ultimately desired result could easily be eliminated |
- // in this early rough filter. |
- bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit); |
- if (was_trimmed) { |
- HistoryIDVector history_ids; |
- std::copy(history_id_set.begin(), history_id_set.end(), |
- std::back_inserter(history_ids)); |
- // Trim down the set by sorting by typed-count, visit-count, and last |
- // visit. |
- HistoryItemFactorGreater |
- item_factor_functor(history_info_map_); |
- std::partial_sort(history_ids.begin(), |
- history_ids.begin() + kItemsToScoreLimit, |
- history_ids.end(), |
- item_factor_functor); |
- history_id_set.clear(); |
- std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit, |
- std::inserter(history_id_set, history_id_set.end())); |
- post_filter_item_count_ = history_id_set.size(); |
- } |
+ScoredHistoryMatches URLIndexPrivateData::GetScoredItemsForSearchString( |
Mark P
2016/08/23 22:14:46
Not quite right. The .h file order is fine. Now
Lavar Askew
2016/08/24 19:53:14
Done.
|
+ const base::string16& search_string, |
+ const HistoryIDSet& history_id_set, |
+ const size_t max_matches, |
+ bookmarks::BookmarkModel* bookmark_model, |
+ TemplateURLService* template_url_service) { |
+ ScoredHistoryMatches scored_items; |
// Pass over all of the candidates filtering out any without a proper |
// substring match, inserting those which pass in order by score. Note that |
@@ -237,9 +203,11 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
// we only want to break up the search string on 'true' whitespace rather than |
// escaped whitespace. When the user types "colspec=ID%20Mstone Release" we |
// get two 'terms': "colspec=id%20mstone" and "release". |
+ base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
String16Vector lower_raw_terms = base::SplitString( |
lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE, |
base::SPLIT_WANT_NONEMPTY); |
+ |
if (lower_raw_terms.empty()) { |
// Don't score matches when there are no terms to score against. (It's |
// possible that the word break iterater that extracts words to search |
@@ -264,26 +232,101 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
max_matches, |
scored_items.end(), |
ScoredHistoryMatch::MatchScoreGreater); |
- scored_items.resize(max_matches); |
+ scored_items.resize(max_matches); |
} else { |
std::sort(scored_items.begin(), scored_items.end(), |
ScoredHistoryMatch::MatchScoreGreater); |
} |
- post_scoring_item_count_ = scored_items.size(); |
+ return scored_items; |
+} |
+ |
+ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
+ base::string16 search_string, |
+ size_t cursor_position, |
+ size_t max_matches, |
+ bookmarks::BookmarkModel* bookmark_model, |
+ TemplateURLService* template_url_service) { |
+ pre_filter_item_count_ = 0; |
+ post_filter_item_count_ = 0; |
+ post_scoring_item_count_ = 0; |
+ |
+ // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
+ // approach. |
+ ResetSearchTermCache(); |
+ |
+ String16Vector lower_words = ExtractIndividualWordVector(search_string); |
+ HistoryIDSet history_id_set = GetHistoryIDSet(lower_words); |
+ |
+ // If cursor position is set and useful (not at either end of the |
+ // string), allow the search string to be broken at cursor position. |
+ // We do this by pretending there's a space where the cursor is. |
+ if ((cursor_position != base::string16::npos) && |
+ (cursor_position < search_string.length()) && (cursor_position > 0)) { |
+ base::string16 search_string_with_word_break(search_string); |
+ |
+ base::string16 word_break = base::ASCIIToUTF16(" "); |
+ search_string_with_word_break.insert(cursor_position, word_break); |
+ |
+ // Add to history_id_set the ids that are related to the original search |
+ // string, but with the word break. |
+ String16Vector lower_words_with_word_break = |
+ ExtractIndividualWordVector(search_string_with_word_break); |
+ |
+ // Check to see if lower_words and lower_words_with_word_break are the same |
+ // size. If they are then this could indicate that the word vectors |
+ // contain the same words. |
+ if (lower_words.size() != lower_words_with_word_break.size()) { |
+ std::sort(lower_words.begin(), lower_words.end(), Less); |
+ std::sort(lower_words_with_word_break.begin(), |
+ lower_words_with_word_break.end(), Less); |
+ if (lower_words != lower_words_with_word_break) { |
+ HistoryIDSet history_id_set_with_word_break = |
+ GetHistoryIDSet(lower_words_with_word_break); |
+ history_id_set.insert(history_id_set_with_word_break.begin(), |
+ history_id_set_with_word_break.end()); |
+ } |
+ } |
+ } |
+ // Trim the candidate pool if it is large. Note that we do not filter out |
+ // items that do not contain the search terms as proper substrings -- doing |
+ // so is the performance-costly operation we are trying to avoid in order |
+ // to maintain omnibox responsiveness. |
+ pre_filter_item_count_ = history_id_set.size(); |
+ // If we trim the results set we do not want to cache the results for next |
+ // time as the user's ultimately desired result could easily be eliminated |
+ // in this early rough filter. |
+ bool was_trimmed = (pre_filter_item_count_ > max_matches); |
+ if (was_trimmed) { |
+ HistoryIDVector history_ids; |
+ std::copy(history_id_set.begin(), history_id_set.end(), |
+ std::back_inserter(history_ids)); |
+ // Trim down the set by sorting by typed-count, visit-count, and last |
+ // visit. |
+ HistoryItemFactorGreater item_factor_functor(history_info_map_); |
+ std::partial_sort(history_ids.begin(), history_ids.begin() + max_matches, |
+ history_ids.end(), item_factor_functor); |
+ history_id_set.clear(); |
+ std::copy(history_ids.begin(), history_ids.begin() + max_matches, |
+ std::inserter(history_id_set, history_id_set.end())); |
+ post_filter_item_count_ = history_id_set.size(); |
+ } |
+ ScoredHistoryMatches scored_items = |
+ GetScoredItemsForSearchString(search_string, history_id_set, max_matches, |
+ bookmark_model, template_url_service); |
if (was_trimmed) { |
search_term_cache_.clear(); // Invalidate the term cache. |
} else { |
// Remove any stale SearchTermCacheItems. |
for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); |
cache_iter != search_term_cache_.end(); ) { |
- if (!cache_iter->second.used_) |
+ if (!cache_iter->second.used_) { |
search_term_cache_.erase(cache_iter++); |
- else |
+ } else { |
++cache_iter; |
+ } |
} |
} |
- |
return scored_items; |
} |