components/omnibox/browser/url_index_private_data.cc - Issue 2842513006: Omnibox - HistoryQuick Provider - Don't Run Duplicate Searches

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2842513006: Omnibox - HistoryQuick Provider - Don't Run Duplicate Searches (Closed)

Patch Set: add comment Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

11 #include <limits>	11 #include <limits>

12 #include <numeric>	12 #include <numeric>

	13 #include <set>

13 #include <string>	14 #include <string>

14 #include <vector>	15 #include <vector>

15	16

16 #include "base/files/file_util.h"	17 #include "base/files/file_util.h"

17 #include "base/i18n/break_iterator.h"	18 #include "base/i18n/break_iterator.h"

18 #include "base/i18n/case_conversion.h"	19 #include "base/i18n/case_conversion.h"

19 #include "base/macros.h"	20 #include "base/macros.h"

20 #include "base/metrics/histogram_macros.h"	21 #include "base/metrics/histogram_macros.h"

21 #include "base/strings/string_split.h"	22 #include "base/strings/string_split.h"

22 #include "base/strings/string_util.h"	23 #include "base/strings/string_util.h"

(...skipping 125 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
148 bookmarks::BookmarkModel* bookmark_model,	149 bookmarks::BookmarkModel* bookmark_model,

149 TemplateURLService* template_url_service) {	150 TemplateURLService* template_url_service) {

150 // This list will contain the original search string and any other string	151 // This list will contain the original search string and any other string

151 // transformations.	152 // transformations.

152 String16Vector search_strings;	153 String16Vector search_strings;

153 search_strings.push_back(original_search_string);	154 search_strings.push_back(original_search_string);

154 if ((cursor_position != base::string16::npos) &&	155 if ((cursor_position != base::string16::npos) &&

155 (cursor_position < original_search_string.length()) &&	156 (cursor_position < original_search_string.length()) &&

156 (cursor_position > 0)) {	157 (cursor_position > 0)) {

157 // The original search_string broken at cursor position. This is one type of	158 // The original search_string broken at cursor position. This is one type of

158 // transformation.	159 // transformation. It's possible this transformation doesn't actually

	160 // break any words. There's no harm in adding the transformation in this

	161 // case because the searching code below prevents running duplicate

	162 // searches.

159 base::string16 transformed_search_string(original_search_string);	163 base::string16 transformed_search_string(original_search_string);

160 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	164 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

161 search_strings.push_back(transformed_search_string);	165 search_strings.push_back(transformed_search_string);

162 }	166 }

163	167

164 ScoredHistoryMatches scored_items;	168 ScoredHistoryMatches scored_items;

165 // Invalidate the term cache and return if we have indexed no words (probably	169 // Invalidate the term cache and return if we have indexed no words (probably

166 // because we've not been initialized yet).	170 // because we've not been initialized yet).

167 if (word_list_.empty()) {	171 if (word_list_.empty()) {

168 search_term_cache_.clear();	172 search_term_cache_.clear();

169 return scored_items;	173 return scored_items;

170 }	174 }

171 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	175 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

172 // approach.	176 // approach.

173 ResetSearchTermCache();	177 ResetSearchTermCache();

174	178

175 bool history_ids_were_trimmed = false;	179 bool history_ids_were_trimmed = false;

	180 // A set containing the list of words extracted from each search string,

	181 // used to prevent running duplicate searches.

	182 std::set<String16Vector> search_string_words;

176 for (const base::string16& search_string : search_strings) {	183 for (const base::string16& search_string : search_strings) {

177 // The search string we receive may contain escaped characters. For reducing	184 // The search string we receive may contain escaped characters. For reducing

178 // the index we need individual, lower-cased words, ignoring escapings. For	185 // the index we need individual, lower-cased words, ignoring escapings. For

179 // the final filtering we need whitespace separated substrings possibly	186 // the final filtering we need whitespace separated substrings possibly

180 // containing escaped characters.	187 // containing escaped characters.

181 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	188 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

182 base::string16 lower_unescaped_string = net::UnescapeURLComponent(	189 base::string16 lower_unescaped_string = net::UnescapeURLComponent(

183 lower_raw_string,	190 lower_raw_string,

184 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	191 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

185 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	192 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

186	193

187 // Extract individual 'words' (as opposed to 'terms'; see comment in	194 // Extract individual 'words' (as opposed to 'terms'; see comment in

188 // HistoryIdsToScoredMatches()) from the search string. When the user types	195 // HistoryIdsToScoredMatches()) from the search string. When the user types

189 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",	196 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",

190 // "mstone" and "release".	197 // "mstone" and "release".

191 String16Vector lower_words(	198 String16Vector lower_words(

192 String16VectorFromString16(lower_unescaped_string, false, nullptr));	199 String16VectorFromString16(lower_unescaped_string, false, nullptr));

193 if (lower_words.empty())	200 if (lower_words.empty())

194 continue;	201 continue;

	202 // If we've already searched for this list of words, don't do it again.

	203 if (search_string_words.find(lower_words) != search_string_words.end())

	204 continue;

	205 search_string_words.insert(lower_words);

195	206

196 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);	207 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);

197 history_ids_were_trimmed \|= TrimHistoryIdsPool(&history_ids);	208 history_ids_were_trimmed \|= TrimHistoryIdsPool(&history_ids);

198	209

199 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,	210 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,

200 template_url_service, bookmark_model,	211 template_url_service, bookmark_model,

201 &scored_items);	212 &scored_items);

202 }	213 }

203 // Select and sort only the top \|max_matches\| results.	214 // Select and sort only the top \|max_matches\| results.

204 if (scored_items.size() > max_matches) {	215 if (scored_items.size() > max_matches) {

(...skipping 1061 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1266 // First cut: typed count, visit count, recency.	1277 // First cut: typed count, visit count, recency.

1267 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1278 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1268 // recently visited (within the last 12/24 hours) as highly important. Get	1279 // recently visited (within the last 12/24 hours) as highly important. Get

1269 // input from mpearson.	1280 // input from mpearson.

1270 if (r1.typed_count() != r2.typed_count())	1281 if (r1.typed_count() != r2.typed_count())

1271 return (r1.typed_count() > r2.typed_count());	1282 return (r1.typed_count() > r2.typed_count());

1272 if (r1.visit_count() != r2.visit_count())	1283 if (r1.visit_count() != r2.visit_count())

1273 return (r1.visit_count() > r2.visit_count());	1284 return (r1.visit_count() > r2.visit_count());

1274 return (r1.last_visit() > r2.last_visit());	1285 return (r1.last_visit() > r2.last_visit());

1275 }	1286 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »