Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2842513006: Omnibox - HistoryQuick Provider - Don't Run Duplicate Searches (Closed)
Patch Set: add comment Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
11 #include <limits> 11 #include <limits>
12 #include <numeric> 12 #include <numeric>
13 #include <set>
13 #include <string> 14 #include <string>
14 #include <vector> 15 #include <vector>
15 16
16 #include "base/files/file_util.h" 17 #include "base/files/file_util.h"
17 #include "base/i18n/break_iterator.h" 18 #include "base/i18n/break_iterator.h"
18 #include "base/i18n/case_conversion.h" 19 #include "base/i18n/case_conversion.h"
19 #include "base/macros.h" 20 #include "base/macros.h"
20 #include "base/metrics/histogram_macros.h" 21 #include "base/metrics/histogram_macros.h"
21 #include "base/strings/string_split.h" 22 #include "base/strings/string_split.h"
22 #include "base/strings/string_util.h" 23 #include "base/strings/string_util.h"
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 bookmarks::BookmarkModel* bookmark_model, 149 bookmarks::BookmarkModel* bookmark_model,
149 TemplateURLService* template_url_service) { 150 TemplateURLService* template_url_service) {
150 // This list will contain the original search string and any other string 151 // This list will contain the original search string and any other string
151 // transformations. 152 // transformations.
152 String16Vector search_strings; 153 String16Vector search_strings;
153 search_strings.push_back(original_search_string); 154 search_strings.push_back(original_search_string);
154 if ((cursor_position != base::string16::npos) && 155 if ((cursor_position != base::string16::npos) &&
155 (cursor_position < original_search_string.length()) && 156 (cursor_position < original_search_string.length()) &&
156 (cursor_position > 0)) { 157 (cursor_position > 0)) {
157 // The original search_string broken at cursor position. This is one type of 158 // The original search_string broken at cursor position. This is one type of
158 // transformation. 159 // transformation. It's possible this transformation doesn't actually
160 // break any words. There's no harm in adding the transformation in this
161 // case because the searching code below prevents running duplicate
162 // searches.
159 base::string16 transformed_search_string(original_search_string); 163 base::string16 transformed_search_string(original_search_string);
160 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); 164 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
161 search_strings.push_back(transformed_search_string); 165 search_strings.push_back(transformed_search_string);
162 } 166 }
163 167
164 ScoredHistoryMatches scored_items; 168 ScoredHistoryMatches scored_items;
165 // Invalidate the term cache and return if we have indexed no words (probably 169 // Invalidate the term cache and return if we have indexed no words (probably
166 // because we've not been initialized yet). 170 // because we've not been initialized yet).
167 if (word_list_.empty()) { 171 if (word_list_.empty()) {
168 search_term_cache_.clear(); 172 search_term_cache_.clear();
169 return scored_items; 173 return scored_items;
170 } 174 }
171 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 175 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
172 // approach. 176 // approach.
173 ResetSearchTermCache(); 177 ResetSearchTermCache();
174 178
175 bool history_ids_were_trimmed = false; 179 bool history_ids_were_trimmed = false;
180 // A set containing the list of words extracted from each search string,
181 // used to prevent running duplicate searches.
182 std::set<String16Vector> search_string_words;
176 for (const base::string16& search_string : search_strings) { 183 for (const base::string16& search_string : search_strings) {
177 // The search string we receive may contain escaped characters. For reducing 184 // The search string we receive may contain escaped characters. For reducing
178 // the index we need individual, lower-cased words, ignoring escapings. For 185 // the index we need individual, lower-cased words, ignoring escapings. For
179 // the final filtering we need whitespace separated substrings possibly 186 // the final filtering we need whitespace separated substrings possibly
180 // containing escaped characters. 187 // containing escaped characters.
181 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 188 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
182 base::string16 lower_unescaped_string = net::UnescapeURLComponent( 189 base::string16 lower_unescaped_string = net::UnescapeURLComponent(
183 lower_raw_string, 190 lower_raw_string,
184 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 191 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
185 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 192 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
186 193
187 // Extract individual 'words' (as opposed to 'terms'; see comment in 194 // Extract individual 'words' (as opposed to 'terms'; see comment in
188 // HistoryIdsToScoredMatches()) from the search string. When the user types 195 // HistoryIdsToScoredMatches()) from the search string. When the user types
189 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", 196 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",
190 // "mstone" and "release". 197 // "mstone" and "release".
191 String16Vector lower_words( 198 String16Vector lower_words(
192 String16VectorFromString16(lower_unescaped_string, false, nullptr)); 199 String16VectorFromString16(lower_unescaped_string, false, nullptr));
193 if (lower_words.empty()) 200 if (lower_words.empty())
194 continue; 201 continue;
202 // If we've already searched for this list of words, don't do it again.
203 if (search_string_words.find(lower_words) != search_string_words.end())
204 continue;
205 search_string_words.insert(lower_words);
195 206
196 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); 207 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);
197 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); 208 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids);
198 209
199 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, 210 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,
200 template_url_service, bookmark_model, 211 template_url_service, bookmark_model,
201 &scored_items); 212 &scored_items);
202 } 213 }
203 // Select and sort only the top |max_matches| results. 214 // Select and sort only the top |max_matches| results.
204 if (scored_items.size() > max_matches) { 215 if (scored_items.size() > max_matches) {
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after
1266 // First cut: typed count, visit count, recency. 1277 // First cut: typed count, visit count, recency.
1267 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1278 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1268 // recently visited (within the last 12/24 hours) as highly important. Get 1279 // recently visited (within the last 12/24 hours) as highly important. Get
1269 // input from mpearson. 1280 // input from mpearson.
1270 if (r1.typed_count() != r2.typed_count()) 1281 if (r1.typed_count() != r2.typed_count())
1271 return (r1.typed_count() > r2.typed_count()); 1282 return (r1.typed_count() > r2.typed_count());
1272 if (r1.visit_count() != r2.visit_count()) 1283 if (r1.visit_count() != r2.visit_count())
1273 return (r1.visit_count() > r2.visit_count()); 1284 return (r1.visit_count() > r2.visit_count());
1274 return (r1.last_visit() > r2.last_visit()); 1285 return (r1.last_visit() > r2.last_visit());
1275 } 1286 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698