OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/url_index_private_data.h" | 5 #include "components/omnibox/browser/url_index_private_data.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <iterator> | 10 #include <iterator> |
11 #include <limits> | 11 #include <limits> |
12 #include <numeric> | 12 #include <numeric> |
13 #include <set> | |
13 #include <string> | 14 #include <string> |
14 #include <vector> | 15 #include <vector> |
15 | 16 |
16 #include "base/files/file_util.h" | 17 #include "base/files/file_util.h" |
17 #include "base/i18n/break_iterator.h" | 18 #include "base/i18n/break_iterator.h" |
18 #include "base/i18n/case_conversion.h" | 19 #include "base/i18n/case_conversion.h" |
19 #include "base/macros.h" | 20 #include "base/macros.h" |
20 #include "base/metrics/histogram_macros.h" | 21 #include "base/metrics/histogram_macros.h" |
21 #include "base/strings/string_split.h" | 22 #include "base/strings/string_split.h" |
22 #include "base/strings/string_util.h" | 23 #include "base/strings/string_util.h" |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
141 : restored_cache_version_(0), | 142 : restored_cache_version_(0), |
142 saved_cache_version_(kCurrentCacheFileVersion) {} | 143 saved_cache_version_(kCurrentCacheFileVersion) {} |
143 | 144 |
144 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( | 145 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
145 base::string16 original_search_string, | 146 base::string16 original_search_string, |
146 size_t cursor_position, | 147 size_t cursor_position, |
147 size_t max_matches, | 148 size_t max_matches, |
148 bookmarks::BookmarkModel* bookmark_model, | 149 bookmarks::BookmarkModel* bookmark_model, |
149 TemplateURLService* template_url_service) { | 150 TemplateURLService* template_url_service) { |
150 // This list will contain the original search string and any other string | 151 // This list will contain the original search string and any other string |
151 // transformations. | 152 // transformations. |
Peter Kasting
2017/05/05 23:07:34
This comment sounds as if we envision many differe
Mark P
2017/05/08 22:46:19
One obvious one that comes to mind is rewriting 's
| |
152 String16Vector search_strings; | 153 String16Vector search_strings; |
153 search_strings.push_back(original_search_string); | 154 search_strings.push_back(original_search_string); |
154 if ((cursor_position != base::string16::npos) && | 155 if ((cursor_position != base::string16::npos) && |
155 (cursor_position < original_search_string.length()) && | 156 (cursor_position < original_search_string.length()) && |
156 (cursor_position > 0)) { | 157 (cursor_position > 0)) { |
157 // The original search_string broken at cursor position. This is one type of | 158 // The original search_string broken at cursor position. This is one type of |
158 // transformation. | 159 // transformation. |
Peter Kasting
2017/05/05 23:07:34
Nit: It might be nice, either here or below, to co
Mark P
2017/05/08 22:46:19
Done.
| |
159 base::string16 transformed_search_string(original_search_string); | 160 base::string16 transformed_search_string(original_search_string); |
160 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); | 161 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); |
161 search_strings.push_back(transformed_search_string); | 162 search_strings.push_back(transformed_search_string); |
162 } | 163 } |
163 | 164 |
164 ScoredHistoryMatches scored_items; | 165 ScoredHistoryMatches scored_items; |
165 // Invalidate the term cache and return if we have indexed no words (probably | 166 // Invalidate the term cache and return if we have indexed no words (probably |
166 // because we've not been initialized yet). | 167 // because we've not been initialized yet). |
167 if (word_list_.empty()) { | 168 if (word_list_.empty()) { |
168 search_term_cache_.clear(); | 169 search_term_cache_.clear(); |
169 return scored_items; | 170 return scored_items; |
170 } | 171 } |
171 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 172 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
172 // approach. | 173 // approach. |
173 ResetSearchTermCache(); | 174 ResetSearchTermCache(); |
174 | 175 |
175 bool history_ids_were_trimmed = false; | 176 bool history_ids_were_trimmed = false; |
177 // A set containing the list of words extracted from each search string, | |
178 // used to prevent running duplicate searches. | |
179 std::set<String16Vector> search_string_words; | |
176 for (const base::string16& search_string : search_strings) { | 180 for (const base::string16& search_string : search_strings) { |
177 // The search string we receive may contain escaped characters. For reducing | 181 // The search string we receive may contain escaped characters. For reducing |
178 // the index we need individual, lower-cased words, ignoring escapings. For | 182 // the index we need individual, lower-cased words, ignoring escapings. For |
179 // the final filtering we need whitespace separated substrings possibly | 183 // the final filtering we need whitespace separated substrings possibly |
180 // containing escaped characters. | 184 // containing escaped characters. |
181 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); | 185 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
182 base::string16 lower_unescaped_string = net::UnescapeURLComponent( | 186 base::string16 lower_unescaped_string = net::UnescapeURLComponent( |
183 lower_raw_string, | 187 lower_raw_string, |
184 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | | 188 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
185 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); | 189 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
186 | 190 |
187 // Extract individual 'words' (as opposed to 'terms'; see comment in | 191 // Extract individual 'words' (as opposed to 'terms'; see comment in |
188 // HistoryIdsToScoredMatches()) from the search string. When the user types | 192 // HistoryIdsToScoredMatches()) from the search string. When the user types |
189 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", | 193 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", |
190 // "mstone" and "release". | 194 // "mstone" and "release". |
191 String16Vector lower_words( | 195 String16Vector lower_words( |
192 String16VectorFromString16(lower_unescaped_string, false, nullptr)); | 196 String16VectorFromString16(lower_unescaped_string, false, nullptr)); |
193 if (lower_words.empty()) | 197 if (lower_words.empty()) |
194 continue; | 198 continue; |
199 // If we've already searched for this list of words, don't do it again. | |
200 if (search_string_words.find(lower_words) != search_string_words.end()) | |
Peter Kasting
2017/05/05 23:07:34
Nit: Or base::ContainsKey(search_string_words, low
Mark P
2017/05/08 22:46:19
Left as-is. I find the current code a bit more re
| |
201 continue; | |
202 search_string_words.insert(lower_words); | |
195 | 203 |
196 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); | 204 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); |
197 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); | 205 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); |
198 | 206 |
199 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, | 207 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, |
200 template_url_service, bookmark_model, | 208 template_url_service, bookmark_model, |
201 &scored_items); | 209 &scored_items); |
202 } | 210 } |
203 // Select and sort only the top |max_matches| results. | 211 // Select and sort only the top |max_matches| results. |
204 if (scored_items.size() > max_matches) { | 212 if (scored_items.size() > max_matches) { |
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1266 // First cut: typed count, visit count, recency. | 1274 // First cut: typed count, visit count, recency. |
1267 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1275 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
1268 // recently visited (within the last 12/24 hours) as highly important. Get | 1276 // recently visited (within the last 12/24 hours) as highly important. Get |
1269 // input from mpearson. | 1277 // input from mpearson. |
1270 if (r1.typed_count() != r2.typed_count()) | 1278 if (r1.typed_count() != r2.typed_count()) |
1271 return (r1.typed_count() > r2.typed_count()); | 1279 return (r1.typed_count() > r2.typed_count()); |
1272 if (r1.visit_count() != r2.visit_count()) | 1280 if (r1.visit_count() != r2.visit_count()) |
1273 return (r1.visit_count() > r2.visit_count()); | 1281 return (r1.visit_count() > r2.visit_count()); |
1274 return (r1.last_visit() > r2.last_visit()); | 1282 return (r1.last_visit() > r2.last_visit()); |
1275 } | 1283 } |
OLD | NEW |