Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(581)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2702413007: Cleaning up test only variables. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 } 134 }
135 135
136 136
137 // URLIndexPrivateData --------------------------------------------------------- 137 // URLIndexPrivateData ---------------------------------------------------------
138 138
139 // static 139 // static
140 constexpr size_t URLIndexPrivateData::kMaxVisitsToStoreInCache; 140 constexpr size_t URLIndexPrivateData::kMaxVisitsToStoreInCache;
141 141
142 URLIndexPrivateData::URLIndexPrivateData() 142 URLIndexPrivateData::URLIndexPrivateData()
143 : restored_cache_version_(0), 143 : restored_cache_version_(0),
144 saved_cache_version_(kCurrentCacheFileVersion), 144 saved_cache_version_(kCurrentCacheFileVersion) {}
145 pre_filter_item_count_(0),
146 post_filter_item_count_(0),
147 post_scoring_item_count_(0) {
148 }
149 145
150 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( 146 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
151 base::string16 original_search_string, 147 base::string16 original_search_string,
152 size_t cursor_position, 148 size_t cursor_position,
153 size_t max_matches, 149 size_t max_matches,
154 bookmarks::BookmarkModel* bookmark_model, 150 bookmarks::BookmarkModel* bookmark_model,
155 TemplateURLService* template_url_service) { 151 TemplateURLService* template_url_service) {
156 pre_filter_item_count_ = 0;
157 post_filter_item_count_ = 0;
158 post_scoring_item_count_ = 0;
159
160 // This list will contain the original search string and any other string 152 // This list will contain the original search string and any other string
161 // transformations. 153 // transformations.
162 String16Vector search_strings; 154 String16Vector search_strings;
163 search_strings.push_back(original_search_string); 155 search_strings.push_back(original_search_string);
164 if ((cursor_position != base::string16::npos) && 156 if ((cursor_position != base::string16::npos) &&
165 (cursor_position < original_search_string.length()) && 157 (cursor_position < original_search_string.length()) &&
166 (cursor_position > 0)) { 158 (cursor_position > 0)) {
167 // The original search_string broken at cursor position. This is one type of 159 // The original search_string broken at cursor position. This is one type of
168 // transformation. 160 // transformation.
169 base::string16 transformed_search_string(original_search_string); 161 base::string16 transformed_search_string(original_search_string);
170 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); 162 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
171 search_strings.push_back(transformed_search_string); 163 search_strings.push_back(transformed_search_string);
172 } 164 }
173 165
174 ScoredHistoryMatches scored_items; 166 ScoredHistoryMatches scored_items;
175 // Invalidate the term cache and return if we have indexed no words (probably 167 // Invalidate the term cache and return if we have indexed no words (probably
176 // because we've not been initialized yet). 168 // because we've not been initialized yet).
177 if (word_list_.empty()) { 169 if (word_list_.empty()) {
178 search_term_cache_.clear(); 170 search_term_cache_.clear();
179 return scored_items; 171 return scored_items;
180 } 172 }
181 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 173 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
182 // approach. 174 // approach.
183 ResetSearchTermCache(); 175 ResetSearchTermCache();
184 176
177 bool history_ids_were_trimmed = false;
185 for (const base::string16& search_string : search_strings) { 178 for (const base::string16& search_string : search_strings) {
186 // The search string we receive may contain escaped characters. For reducing 179 // The search string we receive may contain escaped characters. For reducing
187 // the index we need individual, lower-cased words, ignoring escapings. For 180 // the index we need individual, lower-cased words, ignoring escapings. For
188 // the final filtering we need whitespace separated substrings possibly 181 // the final filtering we need whitespace separated substrings possibly
189 // containing escaped characters. 182 // containing escaped characters.
190 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 183 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
191 base::string16 lower_unescaped_string = net::UnescapeURLComponent( 184 base::string16 lower_unescaped_string = net::UnescapeURLComponent(
192 lower_raw_string, 185 lower_raw_string,
193 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 186 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
194 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 187 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
195 188
196 // Extract individual 'words' (as opposed to 'terms'; see comment in 189 // Extract individual 'words' (as opposed to 'terms'; see comment in
197 // HistoryIdSetToScoredMatches()) from the search string. When the user 190 // HistoryIdSetToScoredMatches()) from the search string. When the user
198 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", 191 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",
199 // "mstone" and "release". 192 // "mstone" and "release".
200 String16Vector lower_words( 193 String16Vector lower_words(
201 String16VectorFromString16(lower_unescaped_string, false, nullptr)); 194 String16VectorFromString16(lower_unescaped_string, false, nullptr));
202 if (lower_words.empty()) 195 if (lower_words.empty())
203 continue; 196 continue;
204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); 197 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
205 pre_filter_item_count_ += history_id_set.size(); 198 history_ids_were_trimmed =
Peter Kasting 2017/02/23 01:04:45 Nit: Use |=
dyaroshev 2017/02/23 01:41:26 This would be a bit operation - is it ok? I think
Peter Kasting 2017/02/23 01:46:06 As you note, A |= B is equivalent to A = A | B. B
dyaroshev 2017/02/24 01:27:03 Done.
206 // Trim the candidate pool if it is large. Note that we do not filter out 199 TrimHistoryIdsPool(&history_id_set) || history_ids_were_trimmed;
207 // items that do not contain the search terms as proper substrings -- 200
208 // doing so is the performance-costly operation we are trying to avoid in
209 // order to maintain omnibox responsiveness.
210 const size_t kItemsToScoreLimit = 500;
211 if (history_id_set.size() > kItemsToScoreLimit) {
212 HistoryIDVector history_ids;
213 std::copy(history_id_set.begin(), history_id_set.end(),
214 std::back_inserter(history_ids));
215 // Trim down the set by sorting by typed-count, visit-count, and last
216 // visit.
217 HistoryItemFactorGreater item_factor_functor(history_info_map_);
218 std::partial_sort(history_ids.begin(),
219 history_ids.begin() + kItemsToScoreLimit,
220 history_ids.end(), item_factor_functor);
221 history_id_set.clear();
222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
223 std::inserter(history_id_set, history_id_set.end()));
224 post_filter_item_count_ += history_id_set.size();
225 } else {
226 post_filter_item_count_ += pre_filter_item_count_;
227 }
228 ScoredHistoryMatches temp_scored_items; 201 ScoredHistoryMatches temp_scored_items;
229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string, 202 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,
230 template_url_service, bookmark_model, 203 template_url_service, bookmark_model,
231 &temp_scored_items); 204 &temp_scored_items);
232 scored_items.insert(scored_items.end(), temp_scored_items.begin(), 205 scored_items.insert(scored_items.end(), temp_scored_items.begin(),
233 temp_scored_items.end()); 206 temp_scored_items.end());
234 } 207 }
235 // Select and sort only the top |max_matches| results. 208 // Select and sort only the top |max_matches| results.
236 if (scored_items.size() > max_matches) { 209 if (scored_items.size() > max_matches) {
237 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, 210 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
238 scored_items.end(), 211 scored_items.end(),
239 ScoredHistoryMatch::MatchScoreGreater); 212 ScoredHistoryMatch::MatchScoreGreater);
240 scored_items.resize(max_matches); 213 scored_items.resize(max_matches);
241 } else { 214 } else {
242 std::sort(scored_items.begin(), scored_items.end(), 215 std::sort(scored_items.begin(), scored_items.end(),
243 ScoredHistoryMatch::MatchScoreGreater); 216 ScoredHistoryMatch::MatchScoreGreater);
244 } 217 }
245 post_scoring_item_count_ = scored_items.size(); 218 if (history_ids_were_trimmed) {
246 if (pre_filter_item_count_ > post_filter_item_count_) {
247 // If we trim the results set we do not want to cache the results for next 219 // If we trim the results set we do not want to cache the results for next
248 // time as the user's ultimately desired result could easily be eliminated 220 // time as the user's ultimately desired result could easily be eliminated
249 // in the early rough filter. 221 // in the early rough filter.
250 search_term_cache_.clear(); 222 search_term_cache_.clear();
251 } else { 223 } else {
252 // Remove any stale SearchTermCacheItems. 224 // Remove any stale SearchTermCacheItems.
253 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin(); 225 for (SearchTermCacheMap::iterator cache_iter = search_term_cache_.begin();
254 cache_iter != search_term_cache_.end(); ) { 226 cache_iter != search_term_cache_.end(); ) {
255 if (!cache_iter->second.used_) 227 if (!cache_iter->second.used_)
256 cache_iter = search_term_cache_.erase(cache_iter); 228 cache_iter = search_term_cache_.erase(cache_iter);
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 data_copy->available_words_ = available_words_; 437 data_copy->available_words_ = available_words_;
466 data_copy->word_map_ = word_map_; 438 data_copy->word_map_ = word_map_;
467 data_copy->char_word_map_ = char_word_map_; 439 data_copy->char_word_map_ = char_word_map_;
468 data_copy->word_id_history_map_ = word_id_history_map_; 440 data_copy->word_id_history_map_ = word_id_history_map_;
469 data_copy->history_id_word_map_ = history_id_word_map_; 441 data_copy->history_id_word_map_ = history_id_word_map_;
470 data_copy->history_info_map_ = history_info_map_; 442 data_copy->history_info_map_ = history_info_map_;
471 data_copy->word_starts_map_ = word_starts_map_; 443 data_copy->word_starts_map_ = word_starts_map_;
472 return data_copy; 444 return data_copy;
473 // Not copied: 445 // Not copied:
474 // search_term_cache_ 446 // search_term_cache_
475 // pre_filter_item_count_
476 // post_filter_item_count_
477 // post_scoring_item_count_
478 } 447 }
479 448
480 bool URLIndexPrivateData::Empty() const { 449 bool URLIndexPrivateData::Empty() const {
481 return history_info_map_.empty(); 450 return history_info_map_.empty();
482 } 451 }
483 452
484 void URLIndexPrivateData::Clear() { 453 void URLIndexPrivateData::Clear() {
485 last_time_rebuilt_from_history_ = base::Time(); 454 last_time_rebuilt_from_history_ = base::Time();
486 word_list_.clear(); 455 word_list_.clear();
487 available_words_.clear(); 456 available_words_.clear();
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
521 history_id_set.swap(term_history_set); 490 history_id_set.swap(term_history_set);
522 } else { 491 } else {
523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>( 492 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(
524 history_id_set, term_history_set); 493 history_id_set, term_history_set);
525 history_id_set.swap(new_history_id_set); 494 history_id_set.swap(new_history_id_set);
526 } 495 }
527 } 496 }
528 return history_id_set; 497 return history_id_set;
529 } 498 }
530 499
500 bool URLIndexPrivateData::TrimHistoryIdsPool(
501 HistoryIDSet* history_id_set) const {
502 constexpr size_t kItemsToScoreLimit = 500;
503 if (history_id_set->size() < kItemsToScoreLimit)
504 return false;
505
506 HistoryIDVector history_ids(history_id_set->begin(), history_id_set->end());
507
508 // Trim down the set by sorting by typed-count, visit-count, and last
509 // visit.
510 auto new_end = history_ids.begin() + kItemsToScoreLimit;
511 HistoryItemFactorGreater item_factor_functor(history_info_map_);
512
513 std::nth_element(history_ids.begin(), new_end, history_ids.end(),
514 item_factor_functor);
515 history_ids.erase(new_end, history_ids.end());
516
517 *history_id_set = {history_ids.begin(), history_ids.end()};
518 return true;
519 }
520
531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( 521 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
532 const base::string16& term) { 522 const base::string16& term) {
533 if (term.empty()) 523 if (term.empty())
534 return HistoryIDSet(); 524 return HistoryIDSet();
535 525
536 // TODO(mrossetti): Consider optimizing for very common terms such as 526 // TODO(mrossetti): Consider optimizing for very common terms such as
537 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently 527 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
538 // occuring words in the user's searches. 528 // occuring words in the user's searches.
539 529
540 size_t term_length = term.length(); 530 size_t term_length = term.length();
(...skipping 840 matching lines...) Expand 10 before | Expand all | Expand 10 after
1381 // First cut: typed count, visit count, recency. 1371 // First cut: typed count, visit count, recency.
1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1372 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1383 // recently visited (within the last 12/24 hours) as highly important. Get 1373 // recently visited (within the last 12/24 hours) as highly important. Get
1384 // input from mpearson. 1374 // input from mpearson.
1385 if (r1.typed_count() != r2.typed_count()) 1375 if (r1.typed_count() != r2.typed_count())
1386 return (r1.typed_count() > r2.typed_count()); 1376 return (r1.typed_count() > r2.typed_count());
1387 if (r1.visit_count() != r2.visit_count()) 1377 if (r1.visit_count() != r2.visit_count())
1388 return (r1.visit_count() > r2.visit_count()); 1378 return (r1.visit_count() > r2.visit_count());
1389 return (r1.last_visit() > r2.last_visit()); 1379 return (r1.last_visit() > r2.last_visit());
1390 } 1380 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698