| Index: components/omnibox/browser/url_index_private_data.cc
|
| diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc
|
| index 5e7f99c6cee99578326402f23d3e38fa1bb4d82d..38d82ea1d3a842cc4e4379c49263200e47b945f7 100644
|
| --- a/components/omnibox/browser/url_index_private_data.cc
|
| +++ b/components/omnibox/browser/url_index_private_data.cc
|
| @@ -140,7 +140,6 @@ void UpdateRecentVisitsFromHistoryDBTask::DoneRunOnMainThread() {
|
| UpdateRecentVisitsFromHistoryDBTask::~UpdateRecentVisitsFromHistoryDBTask() {
|
| }
|
|
|
| -
|
| // URLIndexPrivateData ---------------------------------------------------------
|
|
|
| URLIndexPrivateData::URLIndexPrivateData()
|
| @@ -152,7 +151,7 @@ URLIndexPrivateData::URLIndexPrivateData()
|
| }
|
|
|
| ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
|
| - base::string16 search_string,
|
| + base::string16 original_search_string,
|
| size_t cursor_position,
|
| size_t max_matches,
|
| bookmarks::BookmarkModel* bookmark_model,
|
| @@ -160,164 +159,107 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
|
| pre_filter_item_count_ = 0;
|
| post_filter_item_count_ = 0;
|
| post_scoring_item_count_ = 0;
|
| - // The search string we receive may contain escaped characters. For reducing
|
| - // the index we need individual, lower-cased words, ignoring escapings. For
|
| - // the final filtering we need whitespace separated substrings possibly
|
| - // containing escaped characters.
|
| - base::string16 lower_raw_string(base::i18n::ToLower(search_string));
|
| - base::string16 lower_unescaped_string =
|
| - net::UnescapeURLComponent(lower_raw_string,
|
| - net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
|
| - net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
|
| -
|
| - // Extract individual 'words' (as opposed to 'terms'; see below) from the
|
| - // search string. When the user types "colspec=ID%20Mstone Release" we get
|
| - // four 'words': "colspec", "id", "mstone" and "release".
|
| - String16Vector lower_words(
|
| - String16VectorFromString16(lower_unescaped_string, false, nullptr));
|
| - ScoredHistoryMatches scored_items;
|
|
|
| - // Do nothing if we have indexed no words (probably because we've not been
|
| - // initialized yet) or the search string has no words.
|
| - if (word_list_.empty() || lower_words.empty()) {
|
| - search_term_cache_.clear(); // Invalidate the term cache.
|
| - return scored_items;
|
| + // This list will contain the original search string and any other string
|
| + // transformations.
|
| + String16Vector search_strings;
|
| + search_strings.push_back(original_search_string);
|
| + if ((cursor_position != base::string16::npos) &&
|
| + (cursor_position < original_search_string.length()) &&
|
| + (cursor_position > 0)) {
|
| + // The original search_string broken at cursor position. This is one type of
|
| + // transformation.
|
| + base::string16 transformed_search_string(original_search_string);
|
| + transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
|
| + search_strings.push_back(transformed_search_string);
|
| }
|
|
|
| + ScoredHistoryMatches scored_items;
|
| + // Invalidate the term cache and return if we have indexed no words (probably
|
| + // because we've not been initialized yet).
|
| + if (word_list_.empty()) {
|
| + search_term_cache_.clear();
|
| + return scored_items;
|
| + }
|
| // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
|
| // approach.
|
| ResetSearchTermCache();
|
|
|
| - HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
|
| -
|
| - // Trim the candidate pool if it is large. Note that we do not filter out
|
| - // items that do not contain the search terms as proper substrings -- doing
|
| - // so is the performance-costly operation we are trying to avoid in order
|
| - // to maintain omnibox responsiveness.
|
| - const size_t kItemsToScoreLimit = 500;
|
| - pre_filter_item_count_ = history_id_set.size();
|
| - // If we trim the results set we do not want to cache the results for next
|
| - // time as the user's ultimately desired result could easily be eliminated
|
| - // in this early rough filter.
|
| - bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
|
| - if (was_trimmed) {
|
| - HistoryIDVector history_ids;
|
| - std::copy(history_id_set.begin(), history_id_set.end(),
|
| - std::back_inserter(history_ids));
|
| - // Trim down the set by sorting by typed-count, visit-count, and last
|
| - // visit.
|
| - HistoryItemFactorGreater
|
| - item_factor_functor(history_info_map_);
|
| - std::partial_sort(history_ids.begin(),
|
| - history_ids.begin() + kItemsToScoreLimit,
|
| - history_ids.end(),
|
| - item_factor_functor);
|
| - history_id_set.clear();
|
| - std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
|
| - std::inserter(history_id_set, history_id_set.end()));
|
| - post_filter_item_count_ = history_id_set.size();
|
| - }
|
| + for (const base::string16& search_string : search_strings) {
|
| + // The search string we receive may contain escaped characters. For reducing
|
| + // the index we need individual, lower-cased words, ignoring escapings. For
|
| + // the final filtering we need whitespace separated substrings possibly
|
| + // containing escaped characters.
|
| + base::string16 lower_raw_string(base::i18n::ToLower(search_string));
|
| + base::string16 lower_unescaped_string = net::UnescapeURLComponent(
|
| + lower_raw_string,
|
| + net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
|
| + net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
|
| +
|
| + // Extract individual 'words' (as opposed to 'terms'; see below) from the
|
| + // search string. When the user types "colspec=ID%20Mstone Release" we get
|
| + // four 'words': "colspec", "id", "mstone" and "release".
|
| + String16Vector lower_words(
|
| + String16VectorFromString16(lower_unescaped_string, false, nullptr));
|
| + if (lower_words.empty())
|
| + continue;
|
| + HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
|
| + // Trim the candidate pool if it is large. Note that we do not filter out
|
| + // items that do not contain the search terms as proper substrings --
|
| + // doing so is the performance-costly operation we are trying to avoid in
|
| + // order to maintain omnibox responsiveness.
|
| + const size_t kItemsToScoreLimit = 500;
|
| + pre_filter_item_count_ += history_id_set.size();
|
| + // If we trim the results set we do not want to cache the results for next
|
| + // time as the user's ultimately desired result could easily be eliminated
|
| + // in this early rough filter.
|
| + if (pre_filter_item_count_ > kItemsToScoreLimit) {
|
| + HistoryIDVector history_ids;
|
| + std::copy(history_id_set.begin(), history_id_set.end(),
|
| + std::back_inserter(history_ids));
|
| + // Trim down the set by sorting by typed-count, visit-count, and last
|
| + // visit.
|
| + HistoryItemFactorGreater item_factor_functor(history_info_map_);
|
| + std::partial_sort(history_ids.begin(),
|
| + history_ids.begin() + kItemsToScoreLimit,
|
| + history_ids.end(), item_factor_functor);
|
| + history_id_set.clear();
|
| + std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
|
| + std::inserter(history_id_set, history_id_set.end()));
|
| + post_filter_item_count_ = post_filter_item_count_ + history_id_set.size();
|
| + }
|
| + // Pass over all of the candidates filtering out any without a proper
|
| + // substring match, inserting those which pass in order by score. Note that
|
| + // in this step we are using the raw search string complete with escaped
|
| + // URL elements. When the user has specifically typed something akin to
|
| + // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
|
| + // specific substring appears in the URL or page title.
|
| +
|
| + // We call these 'terms' (as opposed to 'words'; see above) as in this case
|
| + // we only want to break up the search string on 'true' whitespace rather
|
| + // than escaped whitespace. When the user types "colspec=ID%20Mstone
|
| + // Release" we get two 'terms': "colspec=id%20mstone" and "release".
|
| + String16Vector lower_raw_terms =
|
| + base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
|
| + base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
|
|
|
| - // Pass over all of the candidates filtering out any without a proper
|
| - // substring match, inserting those which pass in order by score. Note that
|
| - // in this step we are using the raw search string complete with escaped
|
| - // URL elements. When the user has specifically typed something akin to
|
| - // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that
|
| - // specific substring appears in the URL or page title.
|
| -
|
| - // We call these 'terms' (as opposed to 'words'; see above) as in this case
|
| - // we only want to break up the search string on 'true' whitespace rather than
|
| - // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
|
| - // get two 'terms': "colspec=id%20mstone" and "release".
|
| - String16Vector lower_raw_terms = base::SplitString(
|
| - lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,
|
| - base::SPLIT_WANT_NONEMPTY);
|
| -
|
| - if (lower_raw_terms.empty()) {
|
| // Don't score matches when there are no terms to score against. (It's
|
| // possible that the word break iterater that extracts words to search
|
| // for in the database allows some whitespace "words" whereas SplitString
|
| - // excludes a long list of whitespace.) One could write a scoring
|
| - // function that gives a reasonable order to matches when there
|
| - // are no terms (i.e., all the words are some form of whitespace),
|
| - // but this is such a rare edge case that it's not worth the time.
|
| - return scored_items;
|
| - }
|
| -
|
| - scored_items =
|
| - std::for_each(
|
| - history_id_set.begin(), history_id_set.end(),
|
| - AddHistoryMatch(bookmark_model, template_url_service, *this,
|
| - lower_raw_string, lower_raw_terms, base::Time::Now()))
|
| - .ScoredMatches();
|
| -
|
| - // If cursor position is set and useful (not at either end of the string),
|
| - // allow the search string to be broken at cursor position. We do this by
|
| - // pretending there's a space where the cursor is.
|
| - // This phenomena will be referred to as space ghosting.
|
| - // Going forward we perform a similar process for obtaining
|
| - // scored items as above. In fact all of the variable names are the same
|
| - // except that they are prepended with 'transformed_' reflecting that these
|
| - // variables are related to space ghosting.
|
| - if ((cursor_position != base::string16::npos) &&
|
| - (cursor_position < search_string.length()) && (cursor_position > 0)) {
|
| - // The original search_string broken at cursor position.
|
| - base::string16 transformed_search_string(search_string);
|
| - transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
|
| -
|
| - // The same as lower_raw_string and defined for the same reasons.
|
| - base::string16 transformed_lower_raw_string(
|
| - base::i18n::ToLower(transformed_search_string));
|
| - String16Vector transformed_lower_raw_terms =
|
| - base::SplitString(transformed_lower_raw_string, base::kWhitespaceUTF16,
|
| - base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
|
| - if (!transformed_lower_raw_terms.empty()) {
|
| - base::string16 transormed_lower_unescaped_string =
|
| - net::UnescapeURLComponent(
|
| - transformed_lower_raw_string,
|
| - net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
|
| - net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
|
| - String16Vector transformed_lower_words(String16VectorFromString16(
|
| - transormed_lower_unescaped_string, false, nullptr));
|
| - HistoryIDSet transformed_history_id_set =
|
| - HistoryIDSetFromWords(transformed_lower_words);
|
| -
|
| - if (history_id_set != transformed_history_id_set) {
|
| - pre_filter_item_count_ = history_id_set.size();
|
| - // Trim the candidate pool if it is large as above.
|
| - was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
|
| - if (was_trimmed) {
|
| - HistoryIDVector transformed_history_ids;
|
| - std::copy(transformed_history_id_set.begin(),
|
| - transformed_history_id_set.end(),
|
| - std::back_inserter(transformed_history_ids));
|
| - // Trim down the set by sorting by typed-count, visit-count, and last
|
| - // visit.
|
| - HistoryItemFactorGreater item_factor_functor(history_info_map_);
|
| - std::partial_sort(
|
| - transformed_history_ids.begin(),
|
| - transformed_history_ids.begin() + kItemsToScoreLimit,
|
| - transformed_history_ids.end(), item_factor_functor);
|
| - transformed_history_id_set.clear();
|
| - std::copy(transformed_history_ids.begin(),
|
| - transformed_history_ids.begin() + kItemsToScoreLimit,
|
| - std::inserter(transformed_history_id_set,
|
| - transformed_history_id_set.end()));
|
| - }
|
| - history_id_set.insert(transformed_history_id_set.begin(),
|
| - transformed_history_id_set.end());
|
| - ScoredHistoryMatches transformed_scored_items =
|
| - std::for_each(
|
| - history_id_set.begin(), history_id_set.end(),
|
| - AddHistoryMatch(bookmark_model, template_url_service, *this,
|
| - transformed_lower_raw_string,
|
| - transformed_lower_raw_terms, base::Time::Now()))
|
| - .ScoredMatches();
|
| - scored_items.insert(scored_items.end(),
|
| - transformed_scored_items.begin(),
|
| - transformed_scored_items.end());
|
| - }
|
| - }
|
| + // excludes a long list of whitespace.) One could write a scoring function
|
| + // that gives a reasonable order to matches when there are no terms (i.e.,
|
| + // all the words are some form of whitespace), but this is such a rare edge
|
| + // case that it's not worth the time.
|
| + if (lower_raw_terms.empty())
|
| + continue;
|
| + ScoredHistoryMatches temp_scored_items =
|
| + std::for_each(history_id_set.begin(), history_id_set.end(),
|
| + AddHistoryMatch(bookmark_model, template_url_service,
|
| + *this, lower_raw_string, lower_raw_terms,
|
| + base::Time::Now()))
|
| + .ScoredMatches();
|
| + scored_items.insert(scored_items.end(), temp_scored_items.begin(),
|
| + temp_scored_items.end());
|
| }
|
| // Select and sort only the top |max_matches| results.
|
| if (scored_items.size() > max_matches) {
|
| @@ -330,8 +272,7 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
|
| ScoredHistoryMatch::MatchScoreGreater);
|
| }
|
| post_scoring_item_count_ = scored_items.size();
|
| -
|
| - if (was_trimmed) {
|
| + if (pre_filter_item_count_ > post_filter_item_count_) {
|
| search_term_cache_.clear(); // Invalidate the term cache.
|
| } else {
|
| // Remove any stale SearchTermCacheItems.
|
|
|