Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)
Patch Set: Removing sorting, utilitiy for sets intersection. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 19 matching lines...) Expand all
30 #include "components/omnibox/browser/in_memory_url_index.h" 30 #include "components/omnibox/browser/in_memory_url_index.h"
31 #include "components/search_engines/template_url_service.h" 31 #include "components/search_engines/template_url_service.h"
32 #include "components/url_formatter/url_formatter.h" 32 #include "components/url_formatter/url_formatter.h"
33 33
34 #if defined(USE_SYSTEM_PROTOBUF) 34 #if defined(USE_SYSTEM_PROTOBUF)
35 #include <google/protobuf/repeated_field.h> 35 #include <google/protobuf/repeated_field.h>
36 #else 36 #else
37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
38 #endif 38 #endif
39 39
40 namespace {
41
40 using google::protobuf::RepeatedField; 42 using google::protobuf::RepeatedField;
41 using google::protobuf::RepeatedPtrField; 43 using google::protobuf::RepeatedPtrField;
42 using in_memory_url_index::InMemoryURLIndexCacheItem; 44 using in_memory_url_index::InMemoryURLIndexCacheItem;
43 45
44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem 46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem
45 WordListItem; 47 WordListItem;
46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry 48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry
47 WordMapEntry; 49 WordMapEntry;
48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem 51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem
(...skipping 20 matching lines...) Expand all
70 WordStartsMapEntry; 72 WordStartsMapEntry;
71 73
72 // Algorithm Functions --------------------------------------------------------- 74 // Algorithm Functions ---------------------------------------------------------
73 75
74 // Comparison function for sorting search terms by descending length. 76 // Comparison function for sorting search terms by descending length.
75 bool LengthGreater(const base::string16& string_a, 77 bool LengthGreater(const base::string16& string_a,
76 const base::string16& string_b) { 78 const base::string16& string_b) {
77 return string_a.length() > string_b.length(); 79 return string_a.length() > string_b.length();
78 } 80 }
79 81
82 template <typename ResultingSet, typename I, typename Transformation>
Peter Kasting 2017/02/18 01:46:31 Nit: I -> Iter?
dyaroshev 2017/02/18 11:48:13 Done.
83 // Requires ResultingSet is a Container, I is an InputIterator, Transformation
Peter Kasting 2017/02/18 01:46:31 Nit: template<...> is part of the function declara
dyaroshev 2017/02/18 11:48:14 This comment describes concepts. Used concepts her
Peter Kasting 2017/02/20 10:02:54 You can leave them if you want, but don't insert t
84 // is an UnaryFunction on ValueType<I> that returns a Container.
85 ResultingSet IntersectSets(I first, I last, Transformation get_set) {
Peter Kasting 2017/02/18 01:46:31 Nit: GetTransformIntersection()? IntersectConstru
dyaroshev 2017/02/18 11:48:13 Done.
86 if (first == last)
87 return ResultingSet();
88
89 auto tmp_set = get_set(*first);
90 ResultingSet res(tmp_set.begin(), tmp_set.end());
dyaroshev 2017/02/18 01:22:14 If we keep this, I should write smth like Contain
dyaroshev 2017/02/18 11:48:14 Done. @pkasting: there are number of places here w
Peter Kasting 2017/02/20 10:02:54 I find things more readable without ContainerCast.
91
92 // Like std::accumulate but short circuits if set is empty.
93 for (++first; first != last && !res.empty(); ++first) {
94 auto tmp_set = get_set(*first);
95 if (tmp_set.empty())
96 break;
Peter Kasting 2017/02/18 01:46:31 Shouldn't this return the empty set instead of bre
dyaroshev 2017/02/18 11:48:14 Done. One of the advantages of keeping algorithms
Peter Kasting 2017/02/20 10:02:54 If there's really a need to test them, they can be
97
98 res = base::STLSetIntersection<ResultingSet>(res, tmp_set);
99 }
100
101 return res;
102 }
103
104 } // namespace
80 105
81 // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- 106 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------
82 107
83 // HistoryDBTask used to update the recent visit data for a particular 108 // HistoryDBTask used to update the recent visit data for a particular
84 // row from the history database. 109 // row from the history database.
85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask { 110 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {
86 public: 111 public:
87 explicit UpdateRecentVisitsFromHistoryDBTask( 112 explicit UpdateRecentVisitsFromHistoryDBTask(
88 URLIndexPrivateData* private_data, 113 URLIndexPrivateData* private_data,
89 history::URLID url_id); 114 history::URLID url_id);
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
187 // the index we need individual, lower-cased words, ignoring escapings. For 212 // the index we need individual, lower-cased words, ignoring escapings. For
188 // the final filtering we need whitespace separated substrings possibly 213 // the final filtering we need whitespace separated substrings possibly
189 // containing escaped characters. 214 // containing escaped characters.
190 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 215 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
191 base::string16 lower_unescaped_string = net::UnescapeURLComponent( 216 base::string16 lower_unescaped_string = net::UnescapeURLComponent(
192 lower_raw_string, 217 lower_raw_string,
193 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 218 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
194 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 219 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
195 220
196 // Extract individual 'words' (as opposed to 'terms'; see comment in 221 // Extract individual 'words' (as opposed to 'terms'; see comment in
197 // HistoryIdSetToScoredMatches()) from the search string. When the user 222 // HistoryIdsToScoredMatches()) from the search string. When the user
198 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", 223 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",
199 // "mstone" and "release". 224 // "mstone" and "release".
200 String16Vector lower_words( 225 String16Vector lower_words(
201 String16VectorFromString16(lower_unescaped_string, false, nullptr)); 226 String16VectorFromString16(lower_unescaped_string, false, nullptr));
202 if (lower_words.empty()) 227 if (lower_words.empty())
203 continue; 228 continue;
204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); 229
205 pre_filter_item_count_ += history_id_set.size(); 230 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);
206 // Trim the candidate pool if it is large. Note that we do not filter out 231
207 // items that do not contain the search terms as proper substrings -- 232 pre_filter_item_count_ += history_ids.size();
208 // doing so is the performance-costly operation we are trying to avoid in 233 TrimHistoryIdsPool(&history_ids);
209 // order to maintain omnibox responsiveness. 234 post_filter_item_count_ += history_ids.size();
Peter Kasting 2017/02/18 01:46:31 This line seems like a functional change (it was a
dyaroshev 2017/02/18 11:48:14 Moved this logic into TrimHistoryIdsPool and rever
Peter Kasting 2017/02/20 10:02:54 Ugh, don't to that. Now the code is as wrong as b
dyaroshev 2017/02/20 22:21:31 Other than creating a separate CL with this fix se
dyaroshev 2017/02/22 22:57:25 New CL: https://codereview.chromium.org/2702413007
210 const size_t kItemsToScoreLimit = 500; 235
211 if (history_id_set.size() > kItemsToScoreLimit) {
212 HistoryIDVector history_ids;
213 std::copy(history_id_set.begin(), history_id_set.end(),
214 std::back_inserter(history_ids));
215 // Trim down the set by sorting by typed-count, visit-count, and last
216 // visit.
217 HistoryItemFactorGreater item_factor_functor(history_info_map_);
218 std::partial_sort(history_ids.begin(),
219 history_ids.begin() + kItemsToScoreLimit,
220 history_ids.end(), item_factor_functor);
221 history_id_set.clear();
222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
223 std::inserter(history_id_set, history_id_set.end()));
224 post_filter_item_count_ += history_id_set.size();
225 } else {
226 post_filter_item_count_ += pre_filter_item_count_;
227 }
228 ScoredHistoryMatches temp_scored_items; 236 ScoredHistoryMatches temp_scored_items;
229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string, 237 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,
230 template_url_service, bookmark_model, 238 template_url_service, bookmark_model,
231 &temp_scored_items); 239 &temp_scored_items);
232 scored_items.insert(scored_items.end(), temp_scored_items.begin(), 240 scored_items.insert(scored_items.end(), temp_scored_items.begin(),
233 temp_scored_items.end()); 241 temp_scored_items.end());
234 } 242 }
235 // Select and sort only the top |max_matches| results. 243 // Select and sort only the top |max_matches| results.
236 if (scored_items.size() > max_matches) { 244 if (scored_items.size() > max_matches) {
237 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, 245 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
238 scored_items.end(), 246 scored_items.end(),
239 ScoredHistoryMatch::MatchScoreGreater); 247 ScoredHistoryMatch::MatchScoreGreater);
240 scored_items.resize(max_matches); 248 scored_items.resize(max_matches);
241 } else { 249 } else {
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
477 // post_scoring_item_count_ 485 // post_scoring_item_count_
478 } 486 }
479 487
480 bool URLIndexPrivateData::Empty() const { 488 bool URLIndexPrivateData::Empty() const {
481 return history_info_map_.empty(); 489 return history_info_map_.empty();
482 } 490 }
483 491
484 void URLIndexPrivateData::Clear() { 492 void URLIndexPrivateData::Clear() {
485 last_time_rebuilt_from_history_ = base::Time(); 493 last_time_rebuilt_from_history_ = base::Time();
486 word_list_.clear(); 494 word_list_.clear();
487 available_words_.clear(); 495 available_words_ = {};
488 word_map_.clear(); 496 word_map_.clear();
489 char_word_map_.clear(); 497 char_word_map_.clear();
490 word_id_history_map_.clear(); 498 word_id_history_map_.clear();
491 history_id_word_map_.clear(); 499 history_id_word_map_.clear();
492 history_info_map_.clear(); 500 history_info_map_.clear();
493 word_starts_map_.clear(); 501 word_starts_map_.clear();
494 } 502 }
495 503
496 URLIndexPrivateData::~URLIndexPrivateData() {} 504 URLIndexPrivateData::~URLIndexPrivateData() {}
497 505
498 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 506 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords(
499 const String16Vector& unsorted_words) { 507 const String16Vector& unsorted_words) {
508 // The name of the histogram wasn't changed to HistoryIdsFromWords to preserve
509 // old data valid.
Peter Kasting 2017/02/18 01:46:31 Should we worry about this? Maybe it's OK to just
dyaroshev 2017/02/18 11:48:14 Well, it's for you mostly - I don't have access to
Mark P 2017/02/24 00:34:16 Might as well keep the name of the histogram to ke
500 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords"); 510 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");
501 // Break the terms down into individual terms (words), get the candidate 511 // Break the terms down into individual terms (words), get the candidate
502 // set for each term, and intersect each to get a final candidate list. 512 // set for each term, and intersect each to get a final candidate list.
503 // Note that a single 'term' from the user's perspective might be 513 // Note that a single 'term' from the user's perspective might be
504 // a string like "http://www.somewebsite.com" which, from our perspective, 514 // a string like "http://www.somewebsite.com" which, from our perspective,
505 // is four words: 'http', 'www', 'somewebsite', and 'com'. 515 // is four words: 'http', 'www', 'somewebsite', and 'com'.
506 HistoryIDSet history_id_set; 516 HistoryIDVector history_ids;
dyaroshev 2017/02/18 01:22:14 Unused variable.
dyaroshev 2017/02/18 11:48:14 Done.
507 String16Vector words(unsorted_words); 517 String16Vector words(unsorted_words);
508 // Sort the words into the longest first as such are likely to narrow down 518 // Sort the words into the longest first as such are likely to narrow down
509 // the results quicker. Also, single character words are the most expensive 519 // the results quicker. Also, single character words are the most expensive
510 // to process so save them for last. 520 // to process so save them for last.
511 std::sort(words.begin(), words.end(), LengthGreater); 521 std::sort(words.begin(), words.end(), LengthGreater);
512 for (String16Vector::iterator iter = words.begin(); iter != words.end(); 522
513 ++iter) { 523 return IntersectSets<HistoryIDVector>(
514 base::string16 uni_word = *iter; 524 words.begin(), words.end(),
515 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); 525 [this](const base::string16& word) { return HistoryIDsForTerm(word); });
516 if (term_history_set.empty()) { 526 }
517 history_id_set.clear(); 527
518 break; 528 void URLIndexPrivateData::TrimHistoryIdsPool(
519 } 529 HistoryIDVector* history_ids) const {
520 if (iter == words.begin()) { 530 constexpr size_t kItemsToScoreLimit = 500;
521 history_id_set.swap(term_history_set); 531
Peter Kasting 2017/02/18 01:46:31 Nit: No blank line
dyaroshev 2017/02/18 11:48:14 Done.
522 } else { 532 if (history_ids->size() < kItemsToScoreLimit)
523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>( 533 return;
524 history_id_set, term_history_set); 534
525 history_id_set.swap(new_history_id_set); 535 // Trim down the set by sorting by typed-count, visit-count, and last
526 } 536 // visit.
527 } 537 auto new_end = history_ids->begin() + kItemsToScoreLimit;
528 return history_id_set; 538 HistoryItemFactorGreater item_factor_functor(history_info_map_);
539
540 std::nth_element(history_ids->begin(), new_end, history_ids->end(),
541 item_factor_functor);
542 history_ids->erase(new_end, history_ids->end());
529 } 543 }
530 544
531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( 545 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
532 const base::string16& term) { 546 const base::string16& term) {
533 if (term.empty()) 547 if (term.empty())
534 return HistoryIDSet(); 548 return HistoryIDSet();
535 549
536 // TODO(mrossetti): Consider optimizing for very common terms such as 550 // TODO(mrossetti): Consider optimizing for very common terms such as
537 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently 551 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
538 // occuring words in the user's searches. 552 // occuring words in the user's searches.
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
585 599
586 // Reduce the word set with any leftover, unprocessed characters. 600 // Reduce the word set with any leftover, unprocessed characters.
587 if (!unique_chars.empty()) { 601 if (!unique_chars.empty()) {
588 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); 602 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
589 // We might come up empty on the leftovers. 603 // We might come up empty on the leftovers.
590 if (leftover_set.empty()) { 604 if (leftover_set.empty()) {
591 search_term_cache_[term] = SearchTermCacheItem(); 605 search_term_cache_[term] = SearchTermCacheItem();
592 return HistoryIDSet(); 606 return HistoryIDSet();
593 } 607 }
594 // Or there may not have been a prefix from which to start. 608 // Or there may not have been a prefix from which to start.
595 if (prefix_chars.empty()) { 609 word_id_set = prefix_chars.empty() ? std::move(leftover_set)
596 word_id_set.swap(leftover_set); 610 : base::STLSetIntersection<WordIDSet>(
597 } else { 611 word_id_set, leftover_set);
598 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
599 word_id_set, leftover_set);
600 word_id_set.swap(new_word_id_set);
601 }
602 } 612 }
603 613
604 // We must filter the word list because the resulting word set surely 614 // We must filter the word list because the resulting word set surely
605 // contains words which do not have the search term as a proper subset. 615 // contains words which do not have the search term as a proper subset.
606 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 616 for (WordIDSet::iterator word_set_iter = word_id_set.begin();
607 word_set_iter != word_id_set.end(); ) { 617 word_set_iter != word_id_set.end(); ) {
608 if (word_list_[*word_set_iter].find(term) == base::string16::npos) 618 if (word_list_[*word_set_iter].find(term) == base::string16::npos)
609 word_set_iter = word_id_set.erase(word_set_iter); 619 word_set_iter = word_id_set.erase(word_set_iter);
610 else 620 else
611 ++word_set_iter; 621 ++word_set_iter;
612 } 622 }
613 } else { 623 } else {
614 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 624 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
615 } 625 }
616 626
617 // If any words resulted then we can compose a set of history IDs by unioning 627 // If any words resulted then we can compose a set of history IDs by unioning
618 // the sets from each word. 628 // the sets from each word.
619 HistoryIDSet history_id_set; 629 HistoryIDSet history_id_set;
620 if (!word_id_set.empty()) { 630 if (!word_id_set.empty()) {
621 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 631 for (WordID word_id : word_id_set) {
622 word_id_iter != word_id_set.end(); ++word_id_iter) {
623 WordID word_id = *word_id_iter;
624 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 632 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
625 if (word_iter != word_id_history_map_.end()) { 633 if (word_iter != word_id_history_map_.end()) {
626 HistoryIDSet& word_history_id_set(word_iter->second); 634 HistoryIDSet& word_history_id_set(word_iter->second);
627 history_id_set.insert(word_history_id_set.begin(), 635 history_id_set.insert(word_history_id_set.begin(),
628 word_history_id_set.end()); 636 word_history_id_set.end());
629 } 637 }
630 } 638 }
631 } 639 }
632 640
633 // Record a new cache entry for this word if the term is longer than 641 // Record a new cache entry for this word if the term is longer than
634 // a single character. 642 // a single character.
635 if (term_length > 1) 643 if (term_length > 1)
636 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 644 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
637 645
638 return history_id_set; 646 return history_id_set;
639 } 647 }
640 648
641 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( 649 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
642 const Char16Set& term_chars) { 650 const Char16Set& term_chars) {
643 WordIDSet word_id_set; 651 return IntersectSets<WordIDSet>(term_chars.begin(), term_chars.end(),
644 for (Char16Set::const_iterator c_iter = term_chars.begin(); 652 [&](base::char16 c) {
645 c_iter != term_chars.end(); ++c_iter) { 653 auto char_iter = char_word_map_.find(c);
646 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); 654 if (char_iter == char_word_map_.end())
647 if (char_iter == char_word_map_.end()) { 655 return WordIDSet();
648 // A character was not found so there are no matching results: bail. 656 return char_iter->second;
Peter Kasting 2017/02/18 01:46:31 Nit: Shorter: auto GetSet = [this](base::char16
dyaroshev 2017/02/18 11:48:13 Done.
649 word_id_set.clear(); 657 });
650 break;
651 }
652 WordIDSet& char_word_id_set(char_iter->second);
653 // It is possible for there to no longer be any words associated with
654 // a particular character. Give up in that case.
655 if (char_word_id_set.empty()) {
656 word_id_set.clear();
657 break;
658 }
659
660 if (c_iter == term_chars.begin()) {
661 // First character results becomes base set of results.
662 word_id_set = char_word_id_set;
663 } else {
664 // Subsequent character results get intersected in.
665 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
666 word_id_set, char_word_id_set);
667 word_id_set.swap(new_word_id_set);
668 }
669 }
670 return word_id_set;
671 } 658 }
672 659
673 void URLIndexPrivateData::HistoryIdSetToScoredMatches( 660 void URLIndexPrivateData::HistoryIdsToScoredMatches(
674 HistoryIDSet history_id_set, 661 HistoryIDVector history_ids,
675 const base::string16& lower_raw_string, 662 const base::string16& lower_raw_string,
676 const TemplateURLService* template_url_service, 663 const TemplateURLService* template_url_service,
677 bookmarks::BookmarkModel* bookmark_model, 664 bookmarks::BookmarkModel* bookmark_model,
678 ScoredHistoryMatches* scored_items) const { 665 ScoredHistoryMatches* scored_items) const {
679 if (history_id_set.empty()) 666 if (history_ids.empty())
680 return; 667 return;
681 668
682 // Break up the raw search string (complete with escaped URL elements) into 669 // Break up the raw search string (complete with escaped URL elements) into
683 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()). 670 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).
684 // We only want to break up the search string on 'true' whitespace rather than 671 // We only want to break up the search string on 'true' whitespace rather than
685 // escaped whitespace. For example, when the user types 672 // escaped whitespace. For example, when the user types
686 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and 673 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and
687 // "release". 674 // "release".
688 String16Vector lower_raw_terms = 675 String16Vector lower_raw_terms =
689 base::SplitString(lower_raw_string, base::kWhitespaceUTF16, 676 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
690 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); 677 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
691 678
692 // Don't score matches when there are no terms to score against. (It's 679 // Don't score matches when there are no terms to score against. (It's
693 // possible that the word break iterater that extracts words to search for in 680 // possible that the word break iterater that extracts words to search for in
694 // the database allows some whitespace "words" whereas SplitString excludes a 681 // the database allows some whitespace "words" whereas SplitString excludes a
695 // long list of whitespace.) One could write a scoring function that gives a 682 // long list of whitespace.) One could write a scoring function that gives a
696 // reasonable order to matches when there are no terms (i.e., all the words 683 // reasonable order to matches when there are no terms (i.e., all the words
697 // are some form of whitespace), but this is such a rare edge case that it's 684 // are some form of whitespace), but this is such a rare edge case that it's
698 // not worth the time. 685 // not worth the time.
699 if (lower_raw_terms.empty()) 686 if (lower_raw_terms.empty())
700 return; 687 return;
701 688
702 WordStarts lower_terms_to_word_starts_offsets; 689 WordStarts lower_terms_to_word_starts_offsets;
703 CalculateWordStartsOffsets(lower_raw_terms, 690 CalculateWordStartsOffsets(lower_raw_terms,
704 &lower_terms_to_word_starts_offsets); 691 &lower_terms_to_word_starts_offsets);
705 692
706 // Filter bad matches and other matches we don't want to display. 693 // Filter bad matches and other matches we don't want to display.
707 for (auto it = history_id_set.begin();;) { 694 auto filter = [this, template_url_service](const HistoryID history_id) {
708 it = std::find_if(it, history_id_set.end(), 695 return ShouldFilter(history_id, template_url_service);
709 [this, template_url_service](const HistoryID history_id) { 696 };
710 return ShouldFilter(history_id, template_url_service); 697 history_ids.erase(
711 }); 698 std::remove_if(history_ids.begin(), history_ids.end(), filter),
712 if (it == history_id_set.end()) 699 history_ids.end());
Peter Kasting 2017/02/18 01:46:31 Nit: Pulling history_ids.end() out into a temp |en
dyaroshev 2017/02/18 11:48:14 I don't like invalid iterators lying around and it
Peter Kasting 2017/02/20 10:02:54 That's a reasonable objection to have.
713 break;
714 it = history_id_set.erase(it);
715 }
716 700
717 // Score the matches. 701 // Score the matches.
718 const size_t num_matches = history_id_set.size(); 702 const size_t num_matches = history_ids.size();
719 const base::Time now = base::Time::Now(); 703 const base::Time now = base::Time::Now();
720 std::transform( 704 std::transform(
721 history_id_set.begin(), history_id_set.end(), 705 history_ids.begin(), history_ids.end(), std::back_inserter(*scored_items),
722 std::back_inserter(*scored_items), [&](const HistoryID history_id) { 706 [&](const HistoryID history_id) {
723 auto hist_pos = history_info_map_.find(history_id); 707 auto hist_pos = history_info_map_.find(history_id);
724 const history::URLRow& hist_item = hist_pos->second.url_row; 708 const history::URLRow& hist_item = hist_pos->second.url_row;
725 auto starts_pos = word_starts_map_.find(history_id); 709 auto starts_pos = word_starts_map_.find(history_id);
726 DCHECK(starts_pos != word_starts_map_.end()); 710 DCHECK(starts_pos != word_starts_map_.end());
727 return ScoredHistoryMatch( 711 return ScoredHistoryMatch(
728 hist_item, hist_pos->second.visits, lower_raw_string, 712 hist_item, hist_pos->second.visits, lower_raw_string,
729 lower_raw_terms, lower_terms_to_word_starts_offsets, 713 lower_raw_terms, lower_terms_to_word_starts_offsets,
730 starts_pos->second, 714 starts_pos->second,
731 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), 715 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),
732 num_matches, now); 716 num_matches, now);
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 HistoryID history_id = static_cast<HistoryID>(row.id()); 807 HistoryID history_id = static_cast<HistoryID>(row.id());
824 // Split URL into individual, unique words then add in the title words. 808 // Split URL into individual, unique words then add in the title words.
825 const GURL& gurl(row.url()); 809 const GURL& gurl(row.url());
826 const base::string16& url = 810 const base::string16& url =
827 bookmarks::CleanUpUrlForMatching(gurl, nullptr); 811 bookmarks::CleanUpUrlForMatching(gurl, nullptr);
828 String16Set url_words = String16SetFromString16(url, 812 String16Set url_words = String16SetFromString16(url,
829 word_starts ? &word_starts->url_word_starts_ : nullptr); 813 word_starts ? &word_starts->url_word_starts_ : nullptr);
830 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); 814 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
831 String16Set title_words = String16SetFromString16(title, 815 String16Set title_words = String16SetFromString16(title,
832 word_starts ? &word_starts->title_word_starts_ : nullptr); 816 word_starts ? &word_starts->title_word_starts_ : nullptr);
833 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); 817 for (const auto& word :
834 for (String16Set::iterator word_iter = words.begin(); 818 base::STLSetUnion<String16Set>(url_words, title_words))
835 word_iter != words.end(); ++word_iter) 819 AddWordToIndex(word, history_id);
836 AddWordToIndex(*word_iter, history_id);
837 820
838 search_term_cache_.clear(); // Invalidate the term cache. 821 search_term_cache_.clear(); // Invalidate the term cache.
839 } 822 }
840 823
841 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, 824 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
842 HistoryID history_id) { 825 HistoryID history_id) {
843 WordMap::iterator word_pos = word_map_.find(term); 826 WordMap::iterator word_pos = word_map_.lower_bound(term);
844 if (word_pos != word_map_.end()) 827
845 UpdateWordHistory(word_pos->second, history_id); 828 // Adding a new word (i.e. a word that is not already in the word index).
846 else 829 if (word_pos->first != term) {
847 AddWordHistory(term, history_id); 830 word_pos =
831 word_map_.emplace_hint(word_pos, term, AddNewWordToWordList(term));
832
833 // For each character in the newly added word add the word to the character
834 // index.
835 for (base::char16 uni_char : Char16SetFromString16(term))
836 char_word_map_[uni_char].insert(word_pos->second);
837 }
838
839 DCHECK_EQ(word_pos->first, term);
840
841 word_id_history_map_[word_pos->second].insert(history_id);
842 history_id_word_map_[history_id].insert(word_pos->second);
848 } 843 }
849 844
850 void URLIndexPrivateData::AddWordHistory(const base::string16& term, 845 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {
851 HistoryID history_id) {
852 WordID word_id = word_list_.size(); 846 WordID word_id = word_list_.size();
853 if (available_words_.empty()) { 847 if (available_words_.empty()) {
854 word_list_.push_back(term); 848 word_list_.push_back(term);
855 } else { 849 return word_id;
856 word_id = *(available_words_.begin());
857 word_list_[word_id] = term;
858 available_words_.erase(word_id);
859 } 850 }
860 word_map_[term] = word_id;
861 851
862 HistoryIDSet history_id_set; 852 word_id = available_words_.top();
863 history_id_set.insert(history_id); 853 available_words_.pop();
864 word_id_history_map_[word_id] = history_id_set; 854 return word_id;
865 AddToHistoryIDWordMap(history_id, word_id);
866
867 // For each character in the newly added word (i.e. a word that is not
868 // already in the word index), add the word to the character index.
869 Char16Set characters = Char16SetFromString16(term);
870 for (Char16Set::iterator uni_char_iter = characters.begin();
871 uni_char_iter != characters.end(); ++uni_char_iter) {
872 base::char16 uni_char = *uni_char_iter;
873 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
874 if (char_iter != char_word_map_.end()) {
875 // Update existing entry in the char/word index.
876 WordIDSet& word_id_set(char_iter->second);
877 word_id_set.insert(word_id);
878 } else {
879 // Create a new entry in the char/word index.
880 WordIDSet word_id_set;
881 word_id_set.insert(word_id);
882 char_word_map_[uni_char] = word_id_set;
883 }
884 }
885 }
886
887 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
888 HistoryID history_id) {
889 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
890 DCHECK(history_pos != word_id_history_map_.end());
891 HistoryIDSet& history_id_set(history_pos->second);
892 history_id_set.insert(history_id);
893 AddToHistoryIDWordMap(history_id, word_id);
894 }
895
896 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
897 WordID word_id) {
898 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
899 if (iter != history_id_word_map_.end()) {
900 WordIDSet& word_id_set(iter->second);
901 word_id_set.insert(word_id);
902 } else {
903 WordIDSet word_id_set;
904 word_id_set.insert(word_id);
905 history_id_word_map_[history_id] = word_id_set;
906 }
907 } 855 }
908 856
909 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) { 857 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {
910 RemoveRowWordsFromIndex(row); 858 RemoveRowWordsFromIndex(row);
911 HistoryID history_id = static_cast<HistoryID>(row.id()); 859 HistoryID history_id = static_cast<HistoryID>(row.id());
912 history_info_map_.erase(history_id); 860 history_info_map_.erase(history_id);
913 word_starts_map_.erase(history_id); 861 word_starts_map_.erase(history_id);
914 } 862 }
915 863
916 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) { 864 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {
917 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 865 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
918 // this row. 866 // this row.
919 HistoryID history_id = static_cast<HistoryID>(row.id()); 867 HistoryID history_id = static_cast<HistoryID>(row.id());
920 WordIDSet word_id_set = history_id_word_map_[history_id]; 868 WordIDSet word_id_set = history_id_word_map_[history_id];
921 history_id_word_map_.erase(history_id); 869 history_id_word_map_.erase(history_id);
922 870
923 // Reconcile any changes to word usage. 871 // Reconcile any changes to word usage.
924 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 872 for (WordID word_id : word_id_set) {
925 word_id_iter != word_id_set.end(); ++word_id_iter) { 873 auto word_id_history_map_iter = word_id_history_map_.find(word_id);
926 WordID word_id = *word_id_iter; 874 DCHECK(word_id_history_map_iter != word_id_history_map_.end());
927 word_id_history_map_[word_id].erase(history_id); 875
928 if (!word_id_history_map_[word_id].empty()) 876 word_id_history_map_iter->second.erase(history_id);
929 continue; // The word is still in use. 877 if (!word_id_history_map_iter->second.empty())
878 continue;
930 879
931 // The word is no longer in use. Reconcile any changes to character usage. 880 // The word is no longer in use. Reconcile any changes to character usage.
932 base::string16 word = word_list_[word_id]; 881 base::string16 word = word_list_[word_id];
933 Char16Set characters = Char16SetFromString16(word); 882 for (base::char16 uni_char : Char16SetFromString16(word)) {
934 for (Char16Set::iterator uni_char_iter = characters.begin(); 883 auto char_word_map_iter = char_word_map_.find(uni_char);
935 uni_char_iter != characters.end(); ++uni_char_iter) { 884 char_word_map_iter->second.erase(word_id);
936 base::char16 uni_char = *uni_char_iter; 885 if (char_word_map_iter->second.empty())
937 char_word_map_[uni_char].erase(word_id); 886 char_word_map_.erase(char_word_map_iter);
938 if (char_word_map_[uni_char].empty())
939 char_word_map_.erase(uni_char); // No longer in use.
940 } 887 }
941 888
942 // Complete the removal of references to the word. 889 // Complete the removal of references to the word.
943 word_id_history_map_.erase(word_id); 890 word_id_history_map_.erase(word_id_history_map_iter);
944 word_map_.erase(word); 891 word_map_.erase(word);
945 word_list_[word_id] = base::string16(); 892 word_list_[word_id] = base::string16();
946 available_words_.insert(word_id); 893 available_words_.push(word_id);
947 } 894 }
948 } 895 }
949 896
950 void URLIndexPrivateData::ResetSearchTermCache() { 897 void URLIndexPrivateData::ResetSearchTermCache() {
951 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 898 for (auto& item : search_term_cache_)
952 iter != search_term_cache_.end(); ++iter) 899 item.second.used_ = false;
953 iter->second.used_ = false;
954 } 900 }
955 901
956 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) { 902 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {
957 base::TimeTicks beginning_time = base::TimeTicks::Now(); 903 base::TimeTicks beginning_time = base::TimeTicks::Now();
958 InMemoryURLIndexCacheItem index_cache; 904 InMemoryURLIndexCacheItem index_cache;
959 SavePrivateData(&index_cache); 905 SavePrivateData(&index_cache);
960 std::string data; 906 std::string data;
961 if (!index_cache.SerializeToString(&data)) { 907 if (!index_cache.SerializeToString(&data)) {
962 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 908 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
963 return false; 909 return false;
(...skipping 24 matching lines...) Expand all
988 SaveWordIDHistoryMap(cache); 934 SaveWordIDHistoryMap(cache);
989 SaveHistoryInfoMap(cache); 935 SaveHistoryInfoMap(cache);
990 SaveWordStartsMap(cache); 936 SaveWordStartsMap(cache);
991 } 937 }
992 938
993 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 939 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
994 if (word_list_.empty()) 940 if (word_list_.empty())
995 return; 941 return;
996 WordListItem* list_item = cache->mutable_word_list(); 942 WordListItem* list_item = cache->mutable_word_list();
997 list_item->set_word_count(word_list_.size()); 943 list_item->set_word_count(word_list_.size());
998 for (String16Vector::const_iterator iter = word_list_.begin(); 944 for (const base::string16& word : word_list_)
999 iter != word_list_.end(); ++iter) 945 list_item->add_word(base::UTF16ToUTF8(word));
1000 list_item->add_word(base::UTF16ToUTF8(*iter));
1001 } 946 }
1002 947
1003 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 948 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
1004 if (word_map_.empty()) 949 if (word_map_.empty())
1005 return; 950 return;
1006 WordMapItem* map_item = cache->mutable_word_map(); 951 WordMapItem* map_item = cache->mutable_word_map();
1007 map_item->set_item_count(word_map_.size()); 952 map_item->set_item_count(word_map_.size());
1008 for (WordMap::const_iterator iter = word_map_.begin(); 953 for (const auto& elem : word_map_) {
1009 iter != word_map_.end(); ++iter) {
1010 WordMapEntry* map_entry = map_item->add_word_map_entry(); 954 WordMapEntry* map_entry = map_item->add_word_map_entry();
1011 map_entry->set_word(base::UTF16ToUTF8(iter->first)); 955 map_entry->set_word(base::UTF16ToUTF8(elem.first));
1012 map_entry->set_word_id(iter->second); 956 map_entry->set_word_id(elem.second);
1013 } 957 }
1014 } 958 }
1015 959
1016 void URLIndexPrivateData::SaveCharWordMap( 960 void URLIndexPrivateData::SaveCharWordMap(
1017 InMemoryURLIndexCacheItem* cache) const { 961 InMemoryURLIndexCacheItem* cache) const {
1018 if (char_word_map_.empty()) 962 if (char_word_map_.empty())
1019 return; 963 return;
1020 CharWordMapItem* map_item = cache->mutable_char_word_map(); 964 CharWordMapItem* map_item = cache->mutable_char_word_map();
1021 map_item->set_item_count(char_word_map_.size()); 965 map_item->set_item_count(char_word_map_.size());
1022 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 966 for (const auto& entry : char_word_map_) {
1023 iter != char_word_map_.end(); ++iter) {
1024 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 967 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
1025 map_entry->set_char_16(iter->first); 968 map_entry->set_char_16(entry.first);
1026 const WordIDSet& word_id_set(iter->second); 969 const WordIDSet& word_id_set(entry.second);
1027 map_entry->set_item_count(word_id_set.size()); 970 map_entry->set_item_count(word_id_set.size());
1028 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 971 for (WordID word_id : word_id_set)
1029 set_iter != word_id_set.end(); ++set_iter) 972 map_entry->add_word_id(word_id);
1030 map_entry->add_word_id(*set_iter);
1031 } 973 }
1032 } 974 }
1033 975
1034 void URLIndexPrivateData::SaveWordIDHistoryMap( 976 void URLIndexPrivateData::SaveWordIDHistoryMap(
1035 InMemoryURLIndexCacheItem* cache) const { 977 InMemoryURLIndexCacheItem* cache) const {
1036 if (word_id_history_map_.empty()) 978 if (word_id_history_map_.empty())
1037 return; 979 return;
1038 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 980 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
1039 map_item->set_item_count(word_id_history_map_.size()); 981 map_item->set_item_count(word_id_history_map_.size());
1040 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 982 for (const auto& entry : word_id_history_map_) {
1041 iter != word_id_history_map_.end(); ++iter) {
1042 WordIDHistoryMapEntry* map_entry = 983 WordIDHistoryMapEntry* map_entry =
1043 map_item->add_word_id_history_map_entry(); 984 map_item->add_word_id_history_map_entry();
1044 map_entry->set_word_id(iter->first); 985 map_entry->set_word_id(entry.first);
1045 const HistoryIDSet& history_id_set(iter->second); 986 const HistoryIDSet& history_id_set(entry.second);
1046 map_entry->set_item_count(history_id_set.size()); 987 map_entry->set_item_count(history_id_set.size());
1047 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 988 for (HistoryID history_id : history_id_set)
1048 set_iter != history_id_set.end(); ++set_iter) 989 map_entry->add_history_id(history_id);
1049 map_entry->add_history_id(*set_iter);
1050 } 990 }
1051 } 991 }
1052 992
1053 void URLIndexPrivateData::SaveHistoryInfoMap( 993 void URLIndexPrivateData::SaveHistoryInfoMap(
1054 InMemoryURLIndexCacheItem* cache) const { 994 InMemoryURLIndexCacheItem* cache) const {
1055 if (history_info_map_.empty()) 995 if (history_info_map_.empty())
1056 return; 996 return;
1057 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 997 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1058 map_item->set_item_count(history_info_map_.size()); 998 map_item->set_item_count(history_info_map_.size());
1059 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 999 for (const auto& entry : history_info_map_) {
1060 iter != history_info_map_.end(); ++iter) {
1061 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1000 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1062 map_entry->set_history_id(iter->first); 1001 map_entry->set_history_id(entry.first);
1063 const history::URLRow& url_row(iter->second.url_row); 1002 const history::URLRow& url_row(entry.second.url_row);
1064 // Note: We only save information that contributes to the index so there 1003 // Note: We only save information that contributes to the index so there
1065 // is no need to save search_term_cache_ (not persistent). 1004 // is no need to save search_term_cache_ (not persistent).
1066 map_entry->set_visit_count(url_row.visit_count()); 1005 map_entry->set_visit_count(url_row.visit_count());
1067 map_entry->set_typed_count(url_row.typed_count()); 1006 map_entry->set_typed_count(url_row.typed_count());
1068 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1007 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1069 map_entry->set_url(url_row.url().spec()); 1008 map_entry->set_url(url_row.url().spec());
1070 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); 1009 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
1071 const VisitInfoVector& visits(iter->second.visits); 1010 for (const auto& visit : entry.second.visits) {
1072 for (VisitInfoVector::const_iterator visit_iter = visits.begin();
1073 visit_iter != visits.end(); ++visit_iter) {
1074 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits(); 1011 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();
1075 visit_info->set_visit_time(visit_iter->first.ToInternalValue()); 1012 visit_info->set_visit_time(visit.first.ToInternalValue());
1076 visit_info->set_transition_type(visit_iter->second); 1013 visit_info->set_transition_type(visit.second);
1077 } 1014 }
1078 } 1015 }
1079 } 1016 }
1080 1017
1081 void URLIndexPrivateData::SaveWordStartsMap( 1018 void URLIndexPrivateData::SaveWordStartsMap(
1082 InMemoryURLIndexCacheItem* cache) const { 1019 InMemoryURLIndexCacheItem* cache) const {
1083 if (word_starts_map_.empty()) 1020 if (word_starts_map_.empty())
1084 return; 1021 return;
1085 // For unit testing: Enable saving of the cache as an earlier version to 1022 // For unit testing: Enable saving of the cache as an earlier version to
1086 // allow testing of cache file upgrading in ReadFromFile(). 1023 // allow testing of cache file upgrading in ReadFromFile().
1087 // TODO(mrossetti): Instead of intruding on production code with this kind of 1024 // TODO(mrossetti): Instead of intruding on production code with this kind of
1088 // test harness, save a copy of an older version cache with known results. 1025 // test harness, save a copy of an older version cache with known results.
1089 // Implement this when switching the caching over to SQLite. 1026 // Implement this when switching the caching over to SQLite.
1090 if (saved_cache_version_ < 1) 1027 if (saved_cache_version_ < 1)
1091 return; 1028 return;
1092 1029
1093 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); 1030 WordStartsMapItem* map_item = cache->mutable_word_starts_map();
1094 map_item->set_item_count(word_starts_map_.size()); 1031 map_item->set_item_count(word_starts_map_.size());
1095 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); 1032 for (const auto& entry : word_starts_map_) {
1096 iter != word_starts_map_.end(); ++iter) {
1097 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); 1033 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
1098 map_entry->set_history_id(iter->first); 1034 map_entry->set_history_id(entry.first);
1099 const RowWordStarts& word_starts(iter->second); 1035 const RowWordStarts& word_starts(entry.second);
1100 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); 1036 for (auto url_word_start : word_starts.url_word_starts_)
1101 i != word_starts.url_word_starts_.end(); ++i) 1037 map_entry->add_url_word_starts(url_word_start);
1102 map_entry->add_url_word_starts(*i); 1038 for (auto title_word_start : word_starts.title_word_starts_)
1103 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); 1039 map_entry->add_title_word_starts(title_word_start);
1104 i != word_starts.title_word_starts_.end(); ++i)
1105 map_entry->add_title_word_starts(*i);
1106 } 1040 }
1107 } 1041 }
1108 1042
1109 bool URLIndexPrivateData::RestorePrivateData( 1043 bool URLIndexPrivateData::RestorePrivateData(
1110 const InMemoryURLIndexCacheItem& cache) { 1044 const InMemoryURLIndexCacheItem& cache) {
1111 last_time_rebuilt_from_history_ = 1045 last_time_rebuilt_from_history_ =
1112 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); 1046 base::Time::FromInternalValue(cache.last_rebuild_timestamp());
1113 const base::TimeDelta rebuilt_ago = 1047 const base::TimeDelta rebuilt_ago =
1114 base::Time::Now() - last_time_rebuilt_from_history_; 1048 base::Time::Now() - last_time_rebuilt_from_history_;
1115 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || 1049 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
(...skipping 22 matching lines...) Expand all
1138 bool URLIndexPrivateData::RestoreWordList( 1072 bool URLIndexPrivateData::RestoreWordList(
1139 const InMemoryURLIndexCacheItem& cache) { 1073 const InMemoryURLIndexCacheItem& cache) {
1140 if (!cache.has_word_list()) 1074 if (!cache.has_word_list())
1141 return false; 1075 return false;
1142 const WordListItem& list_item(cache.word_list()); 1076 const WordListItem& list_item(cache.word_list());
1143 uint32_t expected_item_count = list_item.word_count(); 1077 uint32_t expected_item_count = list_item.word_count();
1144 uint32_t actual_item_count = list_item.word_size(); 1078 uint32_t actual_item_count = list_item.word_size();
1145 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1079 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1146 return false; 1080 return false;
1147 const RepeatedPtrField<std::string>& words(list_item.word()); 1081 const RepeatedPtrField<std::string>& words(list_item.word());
1148 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 1082 word_list_.reserve(words.size());
1149 iter != words.end(); ++iter) 1083 std::transform(
1150 word_list_.push_back(base::UTF8ToUTF16(*iter)); 1084 words.begin(), words.end(), std::back_inserter(word_list_),
1085 [](const std::string& word) { return base::UTF8ToUTF16(word); });
1151 return true; 1086 return true;
1152 } 1087 }
1153 1088
1154 bool URLIndexPrivateData::RestoreWordMap( 1089 bool URLIndexPrivateData::RestoreWordMap(
1155 const InMemoryURLIndexCacheItem& cache) { 1090 const InMemoryURLIndexCacheItem& cache) {
1156 if (!cache.has_word_map()) 1091 if (!cache.has_word_map())
1157 return false; 1092 return false;
1158 const WordMapItem& list_item(cache.word_map()); 1093 const WordMapItem& list_item(cache.word_map());
1159 uint32_t expected_item_count = list_item.item_count(); 1094 uint32_t expected_item_count = list_item.item_count();
1160 uint32_t actual_item_count = list_item.word_map_entry_size(); 1095 uint32_t actual_item_count = list_item.word_map_entry_size();
1161 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1096 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1162 return false; 1097 return false;
1163 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 1098 for (const auto& entry : list_item.word_map_entry())
1164 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 1099 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();
1165 iter != entries.end(); ++iter) 1100
1166 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();
1167 return true; 1101 return true;
1168 } 1102 }
1169 1103
1170 bool URLIndexPrivateData::RestoreCharWordMap( 1104 bool URLIndexPrivateData::RestoreCharWordMap(
1171 const InMemoryURLIndexCacheItem& cache) { 1105 const InMemoryURLIndexCacheItem& cache) {
1172 if (!cache.has_char_word_map()) 1106 if (!cache.has_char_word_map())
1173 return false; 1107 return false;
1174 const CharWordMapItem& list_item(cache.char_word_map()); 1108 const CharWordMapItem& list_item(cache.char_word_map());
1175 uint32_t expected_item_count = list_item.item_count(); 1109 uint32_t expected_item_count = list_item.item_count();
1176 uint32_t actual_item_count = list_item.char_word_map_entry_size(); 1110 uint32_t actual_item_count = list_item.char_word_map_entry_size();
1177 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1111 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1178 return false; 1112 return false;
1179 const RepeatedPtrField<CharWordMapEntry>& 1113
1180 entries(list_item.char_word_map_entry()); 1114 for (const auto& entry : list_item.char_word_map_entry()) {
1181 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = 1115 expected_item_count = entry.item_count();
1182 entries.begin(); iter != entries.end(); ++iter) { 1116 actual_item_count = entry.word_id_size();
1183 expected_item_count = iter->item_count();
1184 actual_item_count = iter->word_id_size();
1185 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1117 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1186 return false; 1118 return false;
1187 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); 1119 base::char16 uni_char = static_cast<base::char16>(entry.char_16());
1188 WordIDSet word_id_set; 1120 const RepeatedField<int32_t>& word_ids(entry.word_id());
1189 const RepeatedField<int32_t>& word_ids(iter->word_id()); 1121 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};
1190 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();
1191 jiter != word_ids.end(); ++jiter)
1192 word_id_set.insert(*jiter);
1193 char_word_map_[uni_char] = word_id_set;
1194 } 1122 }
1195 return true; 1123 return true;
1196 } 1124 }
1197 1125
1198 bool URLIndexPrivateData::RestoreWordIDHistoryMap( 1126 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
1199 const InMemoryURLIndexCacheItem& cache) { 1127 const InMemoryURLIndexCacheItem& cache) {
1200 if (!cache.has_word_id_history_map()) 1128 if (!cache.has_word_id_history_map())
1201 return false; 1129 return false;
1202 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); 1130 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1203 uint32_t expected_item_count = list_item.item_count(); 1131 uint32_t expected_item_count = list_item.item_count();
1204 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); 1132 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();
1205 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1133 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1206 return false; 1134 return false;
1207 const RepeatedPtrField<WordIDHistoryMapEntry>& 1135 for (const auto& entry : list_item.word_id_history_map_entry()) {
1208 entries(list_item.word_id_history_map_entry()); 1136 expected_item_count = entry.item_count();
1209 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1137 actual_item_count = entry.history_id_size();
1210 entries.begin(); iter != entries.end(); ++iter) {
1211 expected_item_count = iter->item_count();
1212 actual_item_count = iter->history_id_size();
1213 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1138 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1214 return false; 1139 return false;
1215 WordID word_id = iter->word_id(); 1140 WordID word_id = entry.word_id();
1216 HistoryIDSet history_id_set; 1141 const RepeatedField<int64_t>& history_ids(entry.history_id());
1217 const RepeatedField<int64_t>& history_ids(iter->history_id()); 1142 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};
1218 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin(); 1143 for (HistoryID history_id : history_ids)
1219 jiter != history_ids.end(); ++jiter) { 1144 history_id_word_map_[history_id].insert(word_id);
1220 history_id_set.insert(*jiter);
1221 AddToHistoryIDWordMap(*jiter, word_id);
1222 }
1223 word_id_history_map_[word_id] = history_id_set;
1224 } 1145 }
1225 return true; 1146 return true;
1226 } 1147 }
1227 1148
1228 bool URLIndexPrivateData::RestoreHistoryInfoMap( 1149 bool URLIndexPrivateData::RestoreHistoryInfoMap(
1229 const InMemoryURLIndexCacheItem& cache) { 1150 const InMemoryURLIndexCacheItem& cache) {
1230 if (!cache.has_history_info_map()) 1151 if (!cache.has_history_info_map())
1231 return false; 1152 return false;
1232 const HistoryInfoMapItem& list_item(cache.history_info_map()); 1153 const HistoryInfoMapItem& list_item(cache.history_info_map());
1233 uint32_t expected_item_count = list_item.item_count(); 1154 uint32_t expected_item_count = list_item.item_count();
1234 uint32_t actual_item_count = list_item.history_info_map_entry_size(); 1155 uint32_t actual_item_count = list_item.history_info_map_entry_size();
1235 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1156 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1236 return false; 1157 return false;
1237 const RepeatedPtrField<HistoryInfoMapEntry>& 1158
1238 entries(list_item.history_info_map_entry()); 1159 for (const auto& entry : list_item.history_info_map_entry()) {
1239 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = 1160 HistoryID history_id = entry.history_id();
1240 entries.begin(); iter != entries.end(); ++iter) { 1161 history::URLRow url_row(GURL(entry.url()), history_id);
1241 HistoryID history_id = iter->history_id(); 1162 url_row.set_visit_count(entry.visit_count());
1242 GURL url(iter->url()); 1163 url_row.set_typed_count(entry.typed_count());
1243 history::URLRow url_row(url, history_id); 1164 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));
1244 url_row.set_visit_count(iter->visit_count()); 1165 if (entry.has_title())
1245 url_row.set_typed_count(iter->typed_count()); 1166 url_row.set_title(base::UTF8ToUTF16(entry.title()));
1246 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1167 history_info_map_[history_id].url_row = std::move(url_row);
1247 if (iter->has_title()) {
1248 base::string16 title(base::UTF8ToUTF16(iter->title()));
1249 url_row.set_title(title);
1250 }
1251 history_info_map_[history_id].url_row = url_row;
1252 1168
1253 // Restore visits list. 1169 // Restore visits list.
1254 VisitInfoVector visits; 1170 VisitInfoVector visits;
1255 visits.reserve(iter->visits_size()); 1171 visits.reserve(entry.visits_size());
1256 for (int i = 0; i < iter->visits_size(); ++i) { 1172 for (const auto& entry_visit : entry.visits()) {
1257 visits.push_back(std::make_pair( 1173 visits.emplace_back(
1258 base::Time::FromInternalValue(iter->visits(i).visit_time()), 1174 base::Time::FromInternalValue(entry_visit.visit_time()),
1259 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); 1175 ui::PageTransitionFromInt(entry_visit.transition_type()));
1260 } 1176 }
1261 history_info_map_[history_id].visits = visits; 1177 history_info_map_[history_id].visits = std::move(visits);
1262 } 1178 }
1263 return true; 1179 return true;
1264 } 1180 }
1265 1181
1266 bool URLIndexPrivateData::RestoreWordStartsMap( 1182 bool URLIndexPrivateData::RestoreWordStartsMap(
1267 const InMemoryURLIndexCacheItem& cache) { 1183 const InMemoryURLIndexCacheItem& cache) {
1268 // Note that this function must be called after RestoreHistoryInfoMap() has 1184 // Note that this function must be called after RestoreHistoryInfoMap() has
1269 // been run as the word starts may have to be recalculated from the urls and 1185 // been run as the word starts may have to be recalculated from the urls and
1270 // page titles. 1186 // page titles.
1271 if (cache.has_word_starts_map()) { 1187 if (cache.has_word_starts_map()) {
1272 const WordStartsMapItem& list_item(cache.word_starts_map()); 1188 const WordStartsMapItem& list_item(cache.word_starts_map());
1273 uint32_t expected_item_count = list_item.item_count(); 1189 uint32_t expected_item_count = list_item.item_count();
1274 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); 1190 uint32_t actual_item_count = list_item.word_starts_map_entry_size();
1275 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1191 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1276 return false; 1192 return false;
1277 const RepeatedPtrField<WordStartsMapEntry>& 1193 for (const auto& entry : list_item.word_starts_map_entry()) {
1278 entries(list_item.word_starts_map_entry()); 1194 HistoryID history_id = entry.history_id();
1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
1280 entries.begin(); iter != entries.end(); ++iter) {
1281 HistoryID history_id = iter->history_id();
1282 RowWordStarts word_starts; 1195 RowWordStarts word_starts;
1283 // Restore the URL word starts. 1196 // Restore the URL word starts.
1284 const RepeatedField<int32_t>& url_starts(iter->url_word_starts()); 1197 const RepeatedField<int32_t>& url_starts(entry.url_word_starts());
1285 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin(); 1198 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};
1286 jiter != url_starts.end(); ++jiter) 1199
1287 word_starts.url_word_starts_.push_back(*jiter);
1288 // Restore the page title word starts. 1200 // Restore the page title word starts.
1289 const RepeatedField<int32_t>& title_starts(iter->title_word_starts()); 1201 const RepeatedField<int32_t>& title_starts(entry.title_word_starts());
1290 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin(); 1202 word_starts.title_word_starts_ = {title_starts.begin(),
1291 jiter != title_starts.end(); ++jiter) 1203 title_starts.end()};
1292 word_starts.title_word_starts_.push_back(*jiter); 1204
1293 word_starts_map_[history_id] = word_starts; 1205 word_starts_map_[history_id] = std::move(word_starts);
1294 } 1206 }
1295 } else { 1207 } else {
1296 // Since the cache did not contain any word starts we must rebuild then from 1208 // Since the cache did not contain any word starts we must rebuild then from
1297 // the URL and page titles. 1209 // the URL and page titles.
1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1210 for (const auto& entry : history_info_map_) {
1299 iter != history_info_map_.end(); ++iter) {
1300 RowWordStarts word_starts; 1211 RowWordStarts word_starts;
1301 const history::URLRow& row(iter->second.url_row); 1212 const history::URLRow& row(entry.second.url_row);
1302 const base::string16& url = 1213 const base::string16& url =
1303 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); 1214 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);
1304 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1215 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1305 const base::string16& title = 1216 const base::string16& title =
1306 bookmarks::CleanUpTitleForMatching(row.title()); 1217 bookmarks::CleanUpTitleForMatching(row.title());
1307 String16VectorFromString16(title, false, &word_starts.title_word_starts_); 1218 String16VectorFromString16(title, false, &word_starts.title_word_starts_);
1308 word_starts_map_[iter->first] = word_starts; 1219 word_starts_map_[entry.first] = std::move(word_starts);
1309 } 1220 }
1310 } 1221 }
1311 return true; 1222 return true;
1312 } 1223 }
1313 1224
1314 // static 1225 // static
1315 bool URLIndexPrivateData::URLSchemeIsWhitelisted( 1226 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1316 const GURL& gurl, 1227 const GURL& gurl,
1317 const std::set<std::string>& whitelist) { 1228 const std::set<std::string>& whitelist) {
1318 return whitelist.find(gurl.scheme()) != whitelist.end(); 1229 return whitelist.find(gurl.scheme()) != whitelist.end();
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
1381 // First cut: typed count, visit count, recency. 1292 // First cut: typed count, visit count, recency.
1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1293 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1383 // recently visited (within the last 12/24 hours) as highly important. Get 1294 // recently visited (within the last 12/24 hours) as highly important. Get
1384 // input from mpearson. 1295 // input from mpearson.
1385 if (r1.typed_count() != r2.typed_count()) 1296 if (r1.typed_count() != r2.typed_count())
1386 return (r1.typed_count() > r2.typed_count()); 1297 return (r1.typed_count() > r2.typed_count());
1387 if (r1.visit_count() != r2.visit_count()) 1298 if (r1.visit_count() != r2.visit_count())
1388 return (r1.visit_count() > r2.visit_count()); 1299 return (r1.visit_count() > r2.visit_count());
1389 return (r1.last_visit() > r2.last_visit()); 1300 return (r1.last_visit() > r2.last_visit());
1390 } 1301 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698