OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/url_index_private_data.h" | 5 #include "components/omnibox/browser/url_index_private_data.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <iterator> | 10 #include <iterator> |
11 #include <limits> | 11 #include <limits> |
12 #include <numeric> | 12 #include <numeric> |
| 13 #include <set> |
13 #include <string> | 14 #include <string> |
14 #include <vector> | 15 #include <vector> |
15 | 16 |
16 #include "base/files/file_util.h" | 17 #include "base/files/file_util.h" |
17 #include "base/i18n/break_iterator.h" | 18 #include "base/i18n/break_iterator.h" |
18 #include "base/i18n/case_conversion.h" | 19 #include "base/i18n/case_conversion.h" |
19 #include "base/macros.h" | 20 #include "base/macros.h" |
20 #include "base/metrics/histogram_macros.h" | 21 #include "base/metrics/histogram_macros.h" |
21 #include "base/strings/string_split.h" | 22 #include "base/strings/string_split.h" |
22 #include "base/strings/string_util.h" | 23 #include "base/strings/string_util.h" |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
148 bookmarks::BookmarkModel* bookmark_model, | 149 bookmarks::BookmarkModel* bookmark_model, |
149 TemplateURLService* template_url_service) { | 150 TemplateURLService* template_url_service) { |
150 // This list will contain the original search string and any other string | 151 // This list will contain the original search string and any other string |
151 // transformations. | 152 // transformations. |
152 String16Vector search_strings; | 153 String16Vector search_strings; |
153 search_strings.push_back(original_search_string); | 154 search_strings.push_back(original_search_string); |
154 if ((cursor_position != base::string16::npos) && | 155 if ((cursor_position != base::string16::npos) && |
155 (cursor_position < original_search_string.length()) && | 156 (cursor_position < original_search_string.length()) && |
156 (cursor_position > 0)) { | 157 (cursor_position > 0)) { |
157 // The original search_string broken at cursor position. This is one type of | 158 // The original search_string broken at cursor position. This is one type of |
158 // transformation. | 159 // transformation. It's possible this transformation doesn't actually |
| 160 // break any words. There's no harm in adding the transformation in this |
| 161 // case because the searching code below prevents running duplicate |
| 162 // searches. |
159 base::string16 transformed_search_string(original_search_string); | 163 base::string16 transformed_search_string(original_search_string); |
160 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); | 164 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); |
161 search_strings.push_back(transformed_search_string); | 165 search_strings.push_back(transformed_search_string); |
162 } | 166 } |
163 | 167 |
164 ScoredHistoryMatches scored_items; | 168 ScoredHistoryMatches scored_items; |
165 // Invalidate the term cache and return if we have indexed no words (probably | 169 // Invalidate the term cache and return if we have indexed no words (probably |
166 // because we've not been initialized yet). | 170 // because we've not been initialized yet). |
167 if (word_list_.empty()) { | 171 if (word_list_.empty()) { |
168 search_term_cache_.clear(); | 172 search_term_cache_.clear(); |
169 return scored_items; | 173 return scored_items; |
170 } | 174 } |
171 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 175 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
172 // approach. | 176 // approach. |
173 ResetSearchTermCache(); | 177 ResetSearchTermCache(); |
174 | 178 |
175 bool history_ids_were_trimmed = false; | 179 bool history_ids_were_trimmed = false; |
| 180 // A set containing the list of words extracted from each search string, |
| 181 // used to prevent running duplicate searches. |
| 182 std::set<String16Vector> search_string_words; |
176 for (const base::string16& search_string : search_strings) { | 183 for (const base::string16& search_string : search_strings) { |
177 // The search string we receive may contain escaped characters. For reducing | 184 // The search string we receive may contain escaped characters. For reducing |
178 // the index we need individual, lower-cased words, ignoring escapings. For | 185 // the index we need individual, lower-cased words, ignoring escapings. For |
179 // the final filtering we need whitespace separated substrings possibly | 186 // the final filtering we need whitespace separated substrings possibly |
180 // containing escaped characters. | 187 // containing escaped characters. |
181 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); | 188 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
182 base::string16 lower_unescaped_string = net::UnescapeURLComponent( | 189 base::string16 lower_unescaped_string = net::UnescapeURLComponent( |
183 lower_raw_string, | 190 lower_raw_string, |
184 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | | 191 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
185 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); | 192 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
186 | 193 |
187 // Extract individual 'words' (as opposed to 'terms'; see comment in | 194 // Extract individual 'words' (as opposed to 'terms'; see comment in |
188 // HistoryIdsToScoredMatches()) from the search string. When the user types | 195 // HistoryIdsToScoredMatches()) from the search string. When the user types |
189 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", | 196 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", |
190 // "mstone" and "release". | 197 // "mstone" and "release". |
191 String16Vector lower_words( | 198 String16Vector lower_words( |
192 String16VectorFromString16(lower_unescaped_string, false, nullptr)); | 199 String16VectorFromString16(lower_unescaped_string, false, nullptr)); |
193 if (lower_words.empty()) | 200 if (lower_words.empty()) |
194 continue; | 201 continue; |
| 202 // If we've already searched for this list of words, don't do it again. |
| 203 if (search_string_words.find(lower_words) != search_string_words.end()) |
| 204 continue; |
| 205 search_string_words.insert(lower_words); |
195 | 206 |
196 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); | 207 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); |
197 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); | 208 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); |
198 | 209 |
199 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, | 210 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, |
200 template_url_service, bookmark_model, | 211 template_url_service, bookmark_model, |
201 &scored_items); | 212 &scored_items); |
202 } | 213 } |
203 // Select and sort only the top |max_matches| results. | 214 // Select and sort only the top |max_matches| results. |
204 if (scored_items.size() > max_matches) { | 215 if (scored_items.size() > max_matches) { |
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1266 // First cut: typed count, visit count, recency. | 1277 // First cut: typed count, visit count, recency. |
1267 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1278 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
1268 // recently visited (within the last 12/24 hours) as highly important. Get | 1279 // recently visited (within the last 12/24 hours) as highly important. Get |
1269 // input from mpearson. | 1280 // input from mpearson. |
1270 if (r1.typed_count() != r2.typed_count()) | 1281 if (r1.typed_count() != r2.typed_count()) |
1271 return (r1.typed_count() > r2.typed_count()); | 1282 return (r1.typed_count() > r2.typed_count()); |
1272 if (r1.visit_count() != r2.visit_count()) | 1283 if (r1.visit_count() != r2.visit_count()) |
1273 return (r1.visit_count() > r2.visit_count()); | 1284 return (r1.visit_count() > r2.visit_count()); |
1274 return (r1.last_visit() > r2.last_visit()); | 1285 return (r1.last_visit() > r2.last_visit()); |
1275 } | 1286 } |
OLD | NEW |