Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(27)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2842513006: Omnibox - HistoryQuick Provider - Don't Run Duplicate Searches (Closed)
Patch Set: use set of vectors instead Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
11 #include <limits> 11 #include <limits>
12 #include <numeric> 12 #include <numeric>
13 #include <set>
13 #include <string> 14 #include <string>
14 #include <vector> 15 #include <vector>
15 16
16 #include "base/files/file_util.h" 17 #include "base/files/file_util.h"
17 #include "base/i18n/break_iterator.h" 18 #include "base/i18n/break_iterator.h"
18 #include "base/i18n/case_conversion.h" 19 #include "base/i18n/case_conversion.h"
19 #include "base/macros.h" 20 #include "base/macros.h"
20 #include "base/metrics/histogram_macros.h" 21 #include "base/metrics/histogram_macros.h"
21 #include "base/strings/string_split.h" 22 #include "base/strings/string_split.h"
22 #include "base/strings/string_util.h" 23 #include "base/strings/string_util.h"
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
141 : restored_cache_version_(0), 142 : restored_cache_version_(0),
142 saved_cache_version_(kCurrentCacheFileVersion) {} 143 saved_cache_version_(kCurrentCacheFileVersion) {}
143 144
144 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( 145 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
145 base::string16 original_search_string, 146 base::string16 original_search_string,
146 size_t cursor_position, 147 size_t cursor_position,
147 size_t max_matches, 148 size_t max_matches,
148 bookmarks::BookmarkModel* bookmark_model, 149 bookmarks::BookmarkModel* bookmark_model,
149 TemplateURLService* template_url_service) { 150 TemplateURLService* template_url_service) {
150 // This list will contain the original search string and any other string 151 // This list will contain the original search string and any other string
151 // transformations. 152 // transformations.
Peter Kasting 2017/05/05 23:07:34 This comment sounds as if we envision many differe
Mark P 2017/05/08 22:46:19 One obvious one that comes to mind is rewriting 's
152 String16Vector search_strings; 153 String16Vector search_strings;
153 search_strings.push_back(original_search_string); 154 search_strings.push_back(original_search_string);
154 if ((cursor_position != base::string16::npos) && 155 if ((cursor_position != base::string16::npos) &&
155 (cursor_position < original_search_string.length()) && 156 (cursor_position < original_search_string.length()) &&
156 (cursor_position > 0)) { 157 (cursor_position > 0)) {
157 // The original search_string broken at cursor position. This is one type of 158 // The original search_string broken at cursor position. This is one type of
158 // transformation. 159 // transformation.
Peter Kasting 2017/05/05 23:07:34 Nit: It might be nice, either here or below, to co
Mark P 2017/05/08 22:46:19 Done.
159 base::string16 transformed_search_string(original_search_string); 160 base::string16 transformed_search_string(original_search_string);
160 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); 161 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
161 search_strings.push_back(transformed_search_string); 162 search_strings.push_back(transformed_search_string);
162 } 163 }
163 164
164 ScoredHistoryMatches scored_items; 165 ScoredHistoryMatches scored_items;
165 // Invalidate the term cache and return if we have indexed no words (probably 166 // Invalidate the term cache and return if we have indexed no words (probably
166 // because we've not been initialized yet). 167 // because we've not been initialized yet).
167 if (word_list_.empty()) { 168 if (word_list_.empty()) {
168 search_term_cache_.clear(); 169 search_term_cache_.clear();
169 return scored_items; 170 return scored_items;
170 } 171 }
171 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 172 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
172 // approach. 173 // approach.
173 ResetSearchTermCache(); 174 ResetSearchTermCache();
174 175
175 bool history_ids_were_trimmed = false; 176 bool history_ids_were_trimmed = false;
177 // A set containing the list of words extracted from each search string,
178 // used to prevent running duplicate searches.
179 std::set<String16Vector> search_string_words;
176 for (const base::string16& search_string : search_strings) { 180 for (const base::string16& search_string : search_strings) {
177 // The search string we receive may contain escaped characters. For reducing 181 // The search string we receive may contain escaped characters. For reducing
178 // the index we need individual, lower-cased words, ignoring escapings. For 182 // the index we need individual, lower-cased words, ignoring escapings. For
179 // the final filtering we need whitespace separated substrings possibly 183 // the final filtering we need whitespace separated substrings possibly
180 // containing escaped characters. 184 // containing escaped characters.
181 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 185 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
182 base::string16 lower_unescaped_string = net::UnescapeURLComponent( 186 base::string16 lower_unescaped_string = net::UnescapeURLComponent(
183 lower_raw_string, 187 lower_raw_string,
184 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 188 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
185 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 189 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
186 190
187 // Extract individual 'words' (as opposed to 'terms'; see comment in 191 // Extract individual 'words' (as opposed to 'terms'; see comment in
188 // HistoryIdsToScoredMatches()) from the search string. When the user types 192 // HistoryIdsToScoredMatches()) from the search string. When the user types
189 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", 193 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",
190 // "mstone" and "release". 194 // "mstone" and "release".
191 String16Vector lower_words( 195 String16Vector lower_words(
192 String16VectorFromString16(lower_unescaped_string, false, nullptr)); 196 String16VectorFromString16(lower_unescaped_string, false, nullptr));
193 if (lower_words.empty()) 197 if (lower_words.empty())
194 continue; 198 continue;
199 // If we've already searched for this list of words, don't do it again.
200 if (search_string_words.find(lower_words) != search_string_words.end())
Peter Kasting 2017/05/05 23:07:34 Nit: Or base::ContainsKey(search_string_words, low
Mark P 2017/05/08 22:46:19 Left as-is. I find the current code a bit more re
201 continue;
202 search_string_words.insert(lower_words);
195 203
196 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); 204 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);
197 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); 205 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids);
198 206
199 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, 207 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,
200 template_url_service, bookmark_model, 208 template_url_service, bookmark_model,
201 &scored_items); 209 &scored_items);
202 } 210 }
203 // Select and sort only the top |max_matches| results. 211 // Select and sort only the top |max_matches| results.
204 if (scored_items.size() > max_matches) { 212 if (scored_items.size() > max_matches) {
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after
1266 // First cut: typed count, visit count, recency. 1274 // First cut: typed count, visit count, recency.
1267 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1275 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1268 // recently visited (within the last 12/24 hours) as highly important. Get 1276 // recently visited (within the last 12/24 hours) as highly important. Get
1269 // input from mpearson. 1277 // input from mpearson.
1270 if (r1.typed_count() != r2.typed_count()) 1278 if (r1.typed_count() != r2.typed_count())
1271 return (r1.typed_count() > r2.typed_count()); 1279 return (r1.typed_count() > r2.typed_count());
1272 if (r1.visit_count() != r2.visit_count()) 1280 if (r1.visit_count() != r2.visit_count())
1273 return (r1.visit_count() > r2.visit_count()); 1281 return (r1.visit_count() > r2.visit_count());
1274 return (r1.last_visit() > r2.last_visit()); 1282 return (r1.last_visit() > r2.last_visit());
1275 } 1283 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698