Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(765)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)
Patch Set: Review, round 1. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 191 matching lines...) Expand 10 before | Expand all | Expand 10 after
202 if (lower_words.empty()) 202 if (lower_words.empty())
203 continue; 203 continue;
204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); 204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
205 pre_filter_item_count_ += history_id_set.size(); 205 pre_filter_item_count_ += history_id_set.size();
206 // Trim the candidate pool if it is large. Note that we do not filter out 206 // Trim the candidate pool if it is large. Note that we do not filter out
207 // items that do not contain the search terms as proper substrings -- 207 // items that do not contain the search terms as proper substrings --
208 // doing so is the performance-costly operation we are trying to avoid in 208 // doing so is the performance-costly operation we are trying to avoid in
209 // order to maintain omnibox responsiveness. 209 // order to maintain omnibox responsiveness.
210 const size_t kItemsToScoreLimit = 500; 210 const size_t kItemsToScoreLimit = 500;
211 if (history_id_set.size() > kItemsToScoreLimit) { 211 if (history_id_set.size() > kItemsToScoreLimit) {
212 HistoryIDVector history_ids; 212 HistoryIDVector history_ids(history_id_set.begin(), history_id_set.end());
213 std::copy(history_id_set.begin(), history_id_set.end(), 213
214 std::back_inserter(history_ids));
215 // Trim down the set by sorting by typed-count, visit-count, and last 214 // Trim down the set by sorting by typed-count, visit-count, and last
216 // visit. 215 // visit.
217 HistoryItemFactorGreater item_factor_functor(history_info_map_); 216 HistoryItemFactorGreater item_factor_functor(history_info_map_);
218 std::partial_sort(history_ids.begin(), 217 std::nth_element(history_ids.begin(),
219 history_ids.begin() + kItemsToScoreLimit, 218 history_ids.begin() + kItemsToScoreLimit,
220 history_ids.end(), item_factor_functor); 219 history_ids.end(), item_factor_functor);
221 history_id_set.clear(); 220 history_id_set = {history_ids.begin(),
222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit, 221 history_ids.begin() + kItemsToScoreLimit};
223 std::inserter(history_id_set, history_id_set.end()));
224 post_filter_item_count_ += history_id_set.size(); 222 post_filter_item_count_ += history_id_set.size();
225 } else { 223 } else {
226 post_filter_item_count_ += pre_filter_item_count_; 224 post_filter_item_count_ += pre_filter_item_count_;
227 } 225 }
228 ScoredHistoryMatches temp_scored_items; 226 ScoredHistoryMatches temp_scored_items;
229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string, 227 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string,
230 template_url_service, bookmark_model, 228 template_url_service, bookmark_model,
231 &temp_scored_items); 229 &temp_scored_items);
232 scored_items.insert(scored_items.end(), temp_scored_items.begin(), 230 scored_items.insert(scored_items.end(), temp_scored_items.begin(),
233 temp_scored_items.end()); 231 temp_scored_items.end());
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after
477 // post_scoring_item_count_ 475 // post_scoring_item_count_
478 } 476 }
479 477
480 bool URLIndexPrivateData::Empty() const { 478 bool URLIndexPrivateData::Empty() const {
481 return history_info_map_.empty(); 479 return history_info_map_.empty();
482 } 480 }
483 481
484 void URLIndexPrivateData::Clear() { 482 void URLIndexPrivateData::Clear() {
485 last_time_rebuilt_from_history_ = base::Time(); 483 last_time_rebuilt_from_history_ = base::Time();
486 word_list_.clear(); 484 word_list_.clear();
487 available_words_.clear(); 485 available_words_ = {};
488 word_map_.clear(); 486 word_map_.clear();
489 char_word_map_.clear(); 487 char_word_map_.clear();
490 word_id_history_map_.clear(); 488 word_id_history_map_.clear();
491 history_id_word_map_.clear(); 489 history_id_word_map_.clear();
492 history_info_map_.clear(); 490 history_info_map_.clear();
493 word_starts_map_.clear(); 491 word_starts_map_.clear();
494 } 492 }
495 493
496 URLIndexPrivateData::~URLIndexPrivateData() {} 494 URLIndexPrivateData::~URLIndexPrivateData() {}
497 495
498 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 496 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
499 const String16Vector& unsorted_words) { 497 const String16Vector& unsorted_words) {
500 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords"); 498 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");
501 // Break the terms down into individual terms (words), get the candidate 499 // Break the terms down into individual terms (words), get the candidate
502 // set for each term, and intersect each to get a final candidate list. 500 // set for each term, and intersect each to get a final candidate list.
503 // Note that a single 'term' from the user's perspective might be 501 // Note that a single 'term' from the user's perspective might be
504 // a string like "http://www.somewebsite.com" which, from our perspective, 502 // a string like "http://www.somewebsite.com" which, from our perspective,
505 // is four words: 'http', 'www', 'somewebsite', and 'com'. 503 // is four words: 'http', 'www', 'somewebsite', and 'com'.
506 HistoryIDSet history_id_set; 504 HistoryIDSet history_id_set;
507 String16Vector words(unsorted_words); 505 String16Vector words(unsorted_words);
508 // Sort the words into the longest first as such are likely to narrow down 506 // Sort the words into the longest first as such are likely to narrow down
509 // the results quicker. Also, single character words are the most expensive 507 // the results quicker. Also, single character words are the most expensive
510 // to process so save them for last. 508 // to process so save them for last.
511 std::sort(words.begin(), words.end(), LengthGreater); 509 std::sort(words.begin(), words.end(), LengthGreater);
512 for (String16Vector::iterator iter = words.begin(); iter != words.end(); 510 for (String16Vector::iterator iter = words.begin(); iter != words.end();
dyaroshev 2017/02/17 20:44:38 set_intersection.
513 ++iter) { 511 ++iter) {
514 base::string16 uni_word = *iter; 512 HistoryIDSet term_history_set = HistoryIDsForTerm(*iter);
515 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); 513 if (term_history_set.empty())
516 if (term_history_set.empty()) { 514 return HistoryIDSet();
517 history_id_set.clear(); 515
518 break; 516 history_id_set = (iter == words.begin())
519 } 517 ? std::move(term_history_set)
520 if (iter == words.begin()) { 518 : base::STLSetIntersection<HistoryIDSet>(
521 history_id_set.swap(term_history_set); 519 history_id_set, term_history_set);
522 } else {
523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(
524 history_id_set, term_history_set);
525 history_id_set.swap(new_history_id_set);
526 }
527 } 520 }
528 return history_id_set; 521 return history_id_set;
529 } 522 }
530 523
531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( 524 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
532 const base::string16& term) { 525 const base::string16& term) {
533 if (term.empty()) 526 if (term.empty())
534 return HistoryIDSet(); 527 return HistoryIDSet();
535 528
536 // TODO(mrossetti): Consider optimizing for very common terms such as 529 // TODO(mrossetti): Consider optimizing for very common terms such as
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
585 578
586 // Reduce the word set with any leftover, unprocessed characters. 579 // Reduce the word set with any leftover, unprocessed characters.
587 if (!unique_chars.empty()) { 580 if (!unique_chars.empty()) {
588 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); 581 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
589 // We might come up empty on the leftovers. 582 // We might come up empty on the leftovers.
590 if (leftover_set.empty()) { 583 if (leftover_set.empty()) {
591 search_term_cache_[term] = SearchTermCacheItem(); 584 search_term_cache_[term] = SearchTermCacheItem();
592 return HistoryIDSet(); 585 return HistoryIDSet();
593 } 586 }
594 // Or there may not have been a prefix from which to start. 587 // Or there may not have been a prefix from which to start.
595 if (prefix_chars.empty()) { 588 word_id_set = prefix_chars.empty() ? std::move(leftover_set)
596 word_id_set.swap(leftover_set); 589 : base::STLSetIntersection<WordIDSet>(
597 } else { 590 word_id_set, leftover_set);
598 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
599 word_id_set, leftover_set);
600 word_id_set.swap(new_word_id_set);
601 }
602 } 591 }
603 592
604 // We must filter the word list because the resulting word set surely 593 // We must filter the word list because the resulting word set surely
605 // contains words which do not have the search term as a proper subset. 594 // contains words which do not have the search term as a proper subset.
606 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 595 for (WordIDSet::iterator word_set_iter = word_id_set.begin();
dyaroshev 2017/02/17 20:44:38 erase_if
Peter Kasting 2017/02/18 00:40:40 How come you ended up reverting the changes here?
dyaroshev 2017/02/18 00:57:36 I reverted flat_set to std::set, which has const k
Peter Kasting 2017/02/18 01:46:30 Seems like the issue is that std::set is an inhere
607 word_set_iter != word_id_set.end(); ) { 596 word_set_iter != word_id_set.end(); ) {
608 if (word_list_[*word_set_iter].find(term) == base::string16::npos) 597 if (word_list_[*word_set_iter].find(term) == base::string16::npos)
609 word_set_iter = word_id_set.erase(word_set_iter); 598 word_set_iter = word_id_set.erase(word_set_iter);
610 else 599 else
611 ++word_set_iter; 600 ++word_set_iter;
612 } 601 }
613 } else { 602 } else {
614 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 603 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
615 } 604 }
616 605
617 // If any words resulted then we can compose a set of history IDs by unioning 606 // If any words resulted then we can compose a set of history IDs by unioning
618 // the sets from each word. 607 // the sets from each word.
Peter Kasting 2017/02/18 00:40:40 And here?
dyaroshev 2017/02/18 00:57:36 Putting in a vector and then inserting makes sense
Peter Kasting 2017/02/18 01:46:30 Right, but if it's perf-neutral on std::set and pe
619 HistoryIDSet history_id_set; 608 HistoryIDSet history_id_set;
620 if (!word_id_set.empty()) { 609 if (!word_id_set.empty()) {
621 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 610 for (WordID word_id : word_id_set) {
622 word_id_iter != word_id_set.end(); ++word_id_iter) {
623 WordID word_id = *word_id_iter;
624 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 611 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
625 if (word_iter != word_id_history_map_.end()) { 612 if (word_iter != word_id_history_map_.end()) {
626 HistoryIDSet& word_history_id_set(word_iter->second); 613 HistoryIDSet& word_history_id_set(word_iter->second);
627 history_id_set.insert(word_history_id_set.begin(), 614 history_id_set.insert(word_history_id_set.begin(),
628 word_history_id_set.end()); 615 word_history_id_set.end());
629 } 616 }
630 } 617 }
631 } 618 }
632 619
633 // Record a new cache entry for this word if the term is longer than 620 // Record a new cache entry for this word if the term is longer than
634 // a single character. 621 // a single character.
635 if (term_length > 1) 622 if (term_length > 1)
636 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 623 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
637 624
638 return history_id_set; 625 return history_id_set;
639 } 626 }
640 627
641 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( 628 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
642 const Char16Set& term_chars) { 629 const Char16Set& term_chars) {
643 WordIDSet word_id_set; 630 WordIDSet word_id_set;
631
644 for (Char16Set::const_iterator c_iter = term_chars.begin(); 632 for (Char16Set::const_iterator c_iter = term_chars.begin();
dyaroshev 2017/02/17 20:44:37 set_intersection.
645 c_iter != term_chars.end(); ++c_iter) { 633 c_iter != term_chars.end(); ++c_iter) {
646 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); 634 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
647 if (char_iter == char_word_map_.end()) { 635 if (char_iter == char_word_map_.end()) {
648 // A character was not found so there are no matching results: bail. 636 // A character was not found so there are no matching results: bail.
649 word_id_set.clear(); 637 word_id_set.clear();
650 break; 638 break;
651 } 639 }
652 WordIDSet& char_word_id_set(char_iter->second); 640 WordIDSet& char_word_id_set(char_iter->second);
653 // It is possible for there to no longer be any words associated with 641 // It is possible for there to no longer be any words associated with
654 // a particular character. Give up in that case. 642 // a particular character. Give up in that case.
655 if (char_word_id_set.empty()) { 643 if (char_word_id_set.empty()) {
656 word_id_set.clear(); 644 word_id_set.clear();
657 break; 645 break;
658 } 646 }
659 647
660 if (c_iter == term_chars.begin()) { 648 if (c_iter == term_chars.begin()) {
661 // First character results becomes base set of results. 649 // First character results becomes base set of results.
662 word_id_set = char_word_id_set; 650 word_id_set = char_word_id_set;
663 } else { 651 } else {
664 // Subsequent character results get intersected in. 652 // Subsequent character results get intersected in.
665 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>( 653 word_id_set =
666 word_id_set, char_word_id_set); 654 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set);
667 word_id_set.swap(new_word_id_set);
668 } 655 }
669 } 656 }
657
670 return word_id_set; 658 return word_id_set;
671 } 659 }
672 660
673 void URLIndexPrivateData::HistoryIdSetToScoredMatches( 661 void URLIndexPrivateData::HistoryIdSetToScoredMatches(
674 HistoryIDSet history_id_set, 662 HistoryIDSet history_id_set,
675 const base::string16& lower_raw_string, 663 const base::string16& lower_raw_string,
676 const TemplateURLService* template_url_service, 664 const TemplateURLService* template_url_service,
677 bookmarks::BookmarkModel* bookmark_model, 665 bookmarks::BookmarkModel* bookmark_model,
678 ScoredHistoryMatches* scored_items) const { 666 ScoredHistoryMatches* scored_items) const {
679 if (history_id_set.empty()) 667 if (history_id_set.empty())
(...skipping 17 matching lines...) Expand all
697 // are some form of whitespace), but this is such a rare edge case that it's 685 // are some form of whitespace), but this is such a rare edge case that it's
698 // not worth the time. 686 // not worth the time.
699 if (lower_raw_terms.empty()) 687 if (lower_raw_terms.empty())
700 return; 688 return;
701 689
702 WordStarts lower_terms_to_word_starts_offsets; 690 WordStarts lower_terms_to_word_starts_offsets;
703 CalculateWordStartsOffsets(lower_raw_terms, 691 CalculateWordStartsOffsets(lower_raw_terms,
704 &lower_terms_to_word_starts_offsets); 692 &lower_terms_to_word_starts_offsets);
705 693
706 // Filter bad matches and other matches we don't want to display. 694 // Filter bad matches and other matches we don't want to display.
707 for (auto it = history_id_set.begin();;) { 695 for (auto it = history_id_set.begin();;) {
Peter Kasting 2017/02/18 00:40:40 And here?
dyaroshev 2017/02/18 00:57:36 Same story with remove_if.
708 it = std::find_if(it, history_id_set.end(), 696 it = std::find_if(it, history_id_set.end(),
709 [this, template_url_service](const HistoryID history_id) { 697 [this, template_url_service](const HistoryID history_id) {
710 return ShouldFilter(history_id, template_url_service); 698 return ShouldFilter(history_id, template_url_service);
711 }); 699 });
712 if (it == history_id_set.end()) 700 if (it == history_id_set.end())
713 break; 701 break;
714 it = history_id_set.erase(it); 702 it = history_id_set.erase(it);
715 } 703 }
716 704
717 // Score the matches. 705 // Score the matches.
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 HistoryID history_id = static_cast<HistoryID>(row.id()); 811 HistoryID history_id = static_cast<HistoryID>(row.id());
824 // Split URL into individual, unique words then add in the title words. 812 // Split URL into individual, unique words then add in the title words.
825 const GURL& gurl(row.url()); 813 const GURL& gurl(row.url());
826 const base::string16& url = 814 const base::string16& url =
827 bookmarks::CleanUpUrlForMatching(gurl, nullptr); 815 bookmarks::CleanUpUrlForMatching(gurl, nullptr);
828 String16Set url_words = String16SetFromString16(url, 816 String16Set url_words = String16SetFromString16(url,
829 word_starts ? &word_starts->url_word_starts_ : nullptr); 817 word_starts ? &word_starts->url_word_starts_ : nullptr);
830 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); 818 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
831 String16Set title_words = String16SetFromString16(title, 819 String16Set title_words = String16SetFromString16(title,
832 word_starts ? &word_starts->title_word_starts_ : nullptr); 820 word_starts ? &word_starts->title_word_starts_ : nullptr);
833 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); 821 for (const auto& word :
834 for (String16Set::iterator word_iter = words.begin(); 822 base::STLSetUnion<String16Set>(url_words, title_words))
835 word_iter != words.end(); ++word_iter) 823 AddWordToIndex(word, history_id);
836 AddWordToIndex(*word_iter, history_id);
837 824
838 search_term_cache_.clear(); // Invalidate the term cache. 825 search_term_cache_.clear(); // Invalidate the term cache.
839 } 826 }
840 827
841 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, 828 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
842 HistoryID history_id) { 829 HistoryID history_id) {
843 WordMap::iterator word_pos = word_map_.find(term); 830 WordMap::iterator word_pos = word_map_.lower_bound(term);
844 if (word_pos != word_map_.end()) 831
845 UpdateWordHistory(word_pos->second, history_id); 832 // Adding a new word (i.e. a word that is not already in the word index).
846 else 833 if (word_pos->first != term) {
847 AddWordHistory(term, history_id); 834 word_pos =
835 word_map_.emplace_hint(word_pos, term, AddNewWordToWordList(term));
836
837 // For each character in the newly added word add the word to the character
838 // index.
839 for (base::char16 uni_char : Char16SetFromString16(term))
dyaroshev 2017/02/17 20:44:38 parallel_set.
840 char_word_map_[uni_char].insert(word_pos->second);
841 }
842
843 DCHECK_EQ(word_pos->first, term);
844
845 word_id_history_map_[word_pos->second].insert(history_id);
Peter Kasting 2017/02/18 00:40:40 This looks functionally different than the old cod
dyaroshev 2017/02/18 00:57:36 No - previous code reseted set only for new entrie
846 history_id_word_map_[history_id].insert(word_pos->second);
848 } 847 }
849 848
850 void URLIndexPrivateData::AddWordHistory(const base::string16& term, 849 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {
851 HistoryID history_id) {
852 WordID word_id = word_list_.size(); 850 WordID word_id = word_list_.size();
853 if (available_words_.empty()) { 851 if (available_words_.empty()) {
854 word_list_.push_back(term); 852 word_list_.push_back(term);
855 } else { 853 return word_id;
856 word_id = *(available_words_.begin());
857 word_list_[word_id] = term;
858 available_words_.erase(word_id);
859 } 854 }
860 word_map_[term] = word_id;
861 855
862 HistoryIDSet history_id_set; 856 word_id = available_words_.top();
863 history_id_set.insert(history_id); 857 available_words_.pop();
Peter Kasting 2017/02/18 00:40:40 Doesn't this need to actually place |term| in the
dyaroshev 2017/02/18 00:57:36 Yes. Tests that I ran didn't catch it. Sorry.
864 word_id_history_map_[word_id] = history_id_set; 858 return word_id;
865 AddToHistoryIDWordMap(history_id, word_id);
866
867 // For each character in the newly added word (i.e. a word that is not
868 // already in the word index), add the word to the character index.
869 Char16Set characters = Char16SetFromString16(term);
870 for (Char16Set::iterator uni_char_iter = characters.begin();
871 uni_char_iter != characters.end(); ++uni_char_iter) {
872 base::char16 uni_char = *uni_char_iter;
873 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
874 if (char_iter != char_word_map_.end()) {
875 // Update existing entry in the char/word index.
876 WordIDSet& word_id_set(char_iter->second);
877 word_id_set.insert(word_id);
878 } else {
879 // Create a new entry in the char/word index.
880 WordIDSet word_id_set;
881 word_id_set.insert(word_id);
882 char_word_map_[uni_char] = word_id_set;
883 }
884 }
885 }
886
887 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
888 HistoryID history_id) {
889 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
890 DCHECK(history_pos != word_id_history_map_.end());
891 HistoryIDSet& history_id_set(history_pos->second);
892 history_id_set.insert(history_id);
893 AddToHistoryIDWordMap(history_id, word_id);
894 }
895
896 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
897 WordID word_id) {
898 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
899 if (iter != history_id_word_map_.end()) {
900 WordIDSet& word_id_set(iter->second);
901 word_id_set.insert(word_id);
902 } else {
903 WordIDSet word_id_set;
904 word_id_set.insert(word_id);
905 history_id_word_map_[history_id] = word_id_set;
906 }
907 } 859 }
908 860
909 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) { 861 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {
910 RemoveRowWordsFromIndex(row); 862 RemoveRowWordsFromIndex(row);
911 HistoryID history_id = static_cast<HistoryID>(row.id()); 863 HistoryID history_id = static_cast<HistoryID>(row.id());
912 history_info_map_.erase(history_id); 864 history_info_map_.erase(history_id);
913 word_starts_map_.erase(history_id); 865 word_starts_map_.erase(history_id);
914 } 866 }
915 867
916 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) { 868 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {
917 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 869 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
918 // this row. 870 // this row.
919 HistoryID history_id = static_cast<HistoryID>(row.id()); 871 HistoryID history_id = static_cast<HistoryID>(row.id());
920 WordIDSet word_id_set = history_id_word_map_[history_id]; 872 WordIDSet word_id_set = history_id_word_map_[history_id];
921 history_id_word_map_.erase(history_id); 873 history_id_word_map_.erase(history_id);
922 874
923 // Reconcile any changes to word usage. 875 // Reconcile any changes to word usage.
924 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 876 for (WordID word_id : word_id_set) {
925 word_id_iter != word_id_set.end(); ++word_id_iter) { 877 auto word_id_history_map_iter = word_id_history_map_.find(word_id);
926 WordID word_id = *word_id_iter; 878 DCHECK(word_id_history_map_iter != word_id_history_map_.end());
927 word_id_history_map_[word_id].erase(history_id); 879
928 if (!word_id_history_map_[word_id].empty()) 880 word_id_history_map_iter->second.erase(history_id);
929 continue; // The word is still in use. 881 if (!word_id_history_map_iter->second.empty())
882 continue;
930 883
931 // The word is no longer in use. Reconcile any changes to character usage. 884 // The word is no longer in use. Reconcile any changes to character usage.
932 base::string16 word = word_list_[word_id]; 885 base::string16 word = word_list_[word_id];
933 Char16Set characters = Char16SetFromString16(word); 886 for (base::char16 uni_char : Char16SetFromString16(word)) {
dyaroshev 2017/02/17 20:44:38 parallel_set.
934 for (Char16Set::iterator uni_char_iter = characters.begin(); 887 auto char_word_map_iter = char_word_map_.find(uni_char);
935 uni_char_iter != characters.end(); ++uni_char_iter) { 888 char_word_map_iter->second.erase(word_id);
936 base::char16 uni_char = *uni_char_iter; 889 if (char_word_map_iter->second.empty())
937 char_word_map_[uni_char].erase(word_id); 890 char_word_map_.erase(char_word_map_iter);
938 if (char_word_map_[uni_char].empty())
939 char_word_map_.erase(uni_char); // No longer in use.
940 } 891 }
941 892
942 // Complete the removal of references to the word. 893 // Complete the removal of references to the word.
943 word_id_history_map_.erase(word_id); 894 word_id_history_map_.erase(word_id_history_map_iter);
944 word_map_.erase(word); 895 word_map_.erase(word);
945 word_list_[word_id] = base::string16(); 896 word_list_[word_id] = base::string16();
946 available_words_.insert(word_id); 897 available_words_.push(word_id);
947 } 898 }
948 } 899 }
949 900
950 void URLIndexPrivateData::ResetSearchTermCache() { 901 void URLIndexPrivateData::ResetSearchTermCache() {
951 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 902 for (auto& item : search_term_cache_)
952 iter != search_term_cache_.end(); ++iter) 903 item.second.used_ = false;
953 iter->second.used_ = false;
954 } 904 }
955 905
956 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) { 906 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {
957 base::TimeTicks beginning_time = base::TimeTicks::Now(); 907 base::TimeTicks beginning_time = base::TimeTicks::Now();
958 InMemoryURLIndexCacheItem index_cache; 908 InMemoryURLIndexCacheItem index_cache;
959 SavePrivateData(&index_cache); 909 SavePrivateData(&index_cache);
960 std::string data; 910 std::string data;
961 if (!index_cache.SerializeToString(&data)) { 911 if (!index_cache.SerializeToString(&data)) {
962 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 912 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
963 return false; 913 return false;
(...skipping 24 matching lines...) Expand all
988 SaveWordIDHistoryMap(cache); 938 SaveWordIDHistoryMap(cache);
989 SaveHistoryInfoMap(cache); 939 SaveHistoryInfoMap(cache);
990 SaveWordStartsMap(cache); 940 SaveWordStartsMap(cache);
991 } 941 }
992 942
993 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 943 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
994 if (word_list_.empty()) 944 if (word_list_.empty())
995 return; 945 return;
996 WordListItem* list_item = cache->mutable_word_list(); 946 WordListItem* list_item = cache->mutable_word_list();
997 list_item->set_word_count(word_list_.size()); 947 list_item->set_word_count(word_list_.size());
998 for (String16Vector::const_iterator iter = word_list_.begin(); 948 for (const base::string16& word : word_list_)
999 iter != word_list_.end(); ++iter) 949 list_item->add_word(base::UTF16ToUTF8(word));
1000 list_item->add_word(base::UTF16ToUTF8(*iter));
1001 } 950 }
1002 951
1003 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 952 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
1004 if (word_map_.empty()) 953 if (word_map_.empty())
1005 return; 954 return;
1006 WordMapItem* map_item = cache->mutable_word_map(); 955 WordMapItem* map_item = cache->mutable_word_map();
1007 map_item->set_item_count(word_map_.size()); 956 map_item->set_item_count(word_map_.size());
1008 for (WordMap::const_iterator iter = word_map_.begin(); 957 for (const auto& elem : word_map_) {
1009 iter != word_map_.end(); ++iter) {
1010 WordMapEntry* map_entry = map_item->add_word_map_entry(); 958 WordMapEntry* map_entry = map_item->add_word_map_entry();
1011 map_entry->set_word(base::UTF16ToUTF8(iter->first)); 959 map_entry->set_word(base::UTF16ToUTF8(elem.first));
1012 map_entry->set_word_id(iter->second); 960 map_entry->set_word_id(elem.second);
1013 } 961 }
1014 } 962 }
1015 963
1016 void URLIndexPrivateData::SaveCharWordMap( 964 void URLIndexPrivateData::SaveCharWordMap(
1017 InMemoryURLIndexCacheItem* cache) const { 965 InMemoryURLIndexCacheItem* cache) const {
1018 if (char_word_map_.empty()) 966 if (char_word_map_.empty())
1019 return; 967 return;
1020 CharWordMapItem* map_item = cache->mutable_char_word_map(); 968 CharWordMapItem* map_item = cache->mutable_char_word_map();
1021 map_item->set_item_count(char_word_map_.size()); 969 map_item->set_item_count(char_word_map_.size());
1022 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 970 for (const auto& entry : char_word_map_) {
1023 iter != char_word_map_.end(); ++iter) {
1024 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 971 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
1025 map_entry->set_char_16(iter->first); 972 map_entry->set_char_16(entry.first);
1026 const WordIDSet& word_id_set(iter->second); 973 const WordIDSet& word_id_set(entry.second);
1027 map_entry->set_item_count(word_id_set.size()); 974 map_entry->set_item_count(word_id_set.size());
1028 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 975 for (WordID word_id : word_id_set)
1029 set_iter != word_id_set.end(); ++set_iter) 976 map_entry->add_word_id(word_id);
1030 map_entry->add_word_id(*set_iter);
1031 } 977 }
1032 } 978 }
1033 979
1034 void URLIndexPrivateData::SaveWordIDHistoryMap( 980 void URLIndexPrivateData::SaveWordIDHistoryMap(
1035 InMemoryURLIndexCacheItem* cache) const { 981 InMemoryURLIndexCacheItem* cache) const {
1036 if (word_id_history_map_.empty()) 982 if (word_id_history_map_.empty())
1037 return; 983 return;
1038 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 984 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
1039 map_item->set_item_count(word_id_history_map_.size()); 985 map_item->set_item_count(word_id_history_map_.size());
1040 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 986 for (const auto& entry : word_id_history_map_) {
1041 iter != word_id_history_map_.end(); ++iter) {
1042 WordIDHistoryMapEntry* map_entry = 987 WordIDHistoryMapEntry* map_entry =
1043 map_item->add_word_id_history_map_entry(); 988 map_item->add_word_id_history_map_entry();
1044 map_entry->set_word_id(iter->first); 989 map_entry->set_word_id(entry.first);
1045 const HistoryIDSet& history_id_set(iter->second); 990 const HistoryIDSet& history_id_set(entry.second);
1046 map_entry->set_item_count(history_id_set.size()); 991 map_entry->set_item_count(history_id_set.size());
1047 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 992 for (HistoryID history_id : history_id_set)
1048 set_iter != history_id_set.end(); ++set_iter) 993 map_entry->add_history_id(history_id);
1049 map_entry->add_history_id(*set_iter);
1050 } 994 }
1051 } 995 }
1052 996
1053 void URLIndexPrivateData::SaveHistoryInfoMap( 997 void URLIndexPrivateData::SaveHistoryInfoMap(
1054 InMemoryURLIndexCacheItem* cache) const { 998 InMemoryURLIndexCacheItem* cache) const {
1055 if (history_info_map_.empty()) 999 if (history_info_map_.empty())
1056 return; 1000 return;
1057 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 1001 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1058 map_item->set_item_count(history_info_map_.size()); 1002 map_item->set_item_count(history_info_map_.size());
1059 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1003 for (const auto& entry : history_info_map_) {
1060 iter != history_info_map_.end(); ++iter) {
1061 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1004 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1062 map_entry->set_history_id(iter->first); 1005 map_entry->set_history_id(entry.first);
1063 const history::URLRow& url_row(iter->second.url_row); 1006 const history::URLRow& url_row(entry.second.url_row);
1064 // Note: We only save information that contributes to the index so there 1007 // Note: We only save information that contributes to the index so there
1065 // is no need to save search_term_cache_ (not persistent). 1008 // is no need to save search_term_cache_ (not persistent).
1066 map_entry->set_visit_count(url_row.visit_count()); 1009 map_entry->set_visit_count(url_row.visit_count());
1067 map_entry->set_typed_count(url_row.typed_count()); 1010 map_entry->set_typed_count(url_row.typed_count());
1068 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1011 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1069 map_entry->set_url(url_row.url().spec()); 1012 map_entry->set_url(url_row.url().spec());
1070 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); 1013 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
1071 const VisitInfoVector& visits(iter->second.visits); 1014 for (const auto& visit : entry.second.visits) {
1072 for (VisitInfoVector::const_iterator visit_iter = visits.begin();
1073 visit_iter != visits.end(); ++visit_iter) {
1074 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits(); 1015 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();
1075 visit_info->set_visit_time(visit_iter->first.ToInternalValue()); 1016 visit_info->set_visit_time(visit.first.ToInternalValue());
1076 visit_info->set_transition_type(visit_iter->second); 1017 visit_info->set_transition_type(visit.second);
1077 } 1018 }
1078 } 1019 }
1079 } 1020 }
1080 1021
1081 void URLIndexPrivateData::SaveWordStartsMap( 1022 void URLIndexPrivateData::SaveWordStartsMap(
1082 InMemoryURLIndexCacheItem* cache) const { 1023 InMemoryURLIndexCacheItem* cache) const {
1083 if (word_starts_map_.empty()) 1024 if (word_starts_map_.empty())
1084 return; 1025 return;
1085 // For unit testing: Enable saving of the cache as an earlier version to 1026 // For unit testing: Enable saving of the cache as an earlier version to
1086 // allow testing of cache file upgrading in ReadFromFile(). 1027 // allow testing of cache file upgrading in ReadFromFile().
1087 // TODO(mrossetti): Instead of intruding on production code with this kind of 1028 // TODO(mrossetti): Instead of intruding on production code with this kind of
1088 // test harness, save a copy of an older version cache with known results. 1029 // test harness, save a copy of an older version cache with known results.
1089 // Implement this when switching the caching over to SQLite. 1030 // Implement this when switching the caching over to SQLite.
1090 if (saved_cache_version_ < 1) 1031 if (saved_cache_version_ < 1)
1091 return; 1032 return;
1092 1033
1093 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); 1034 WordStartsMapItem* map_item = cache->mutable_word_starts_map();
1094 map_item->set_item_count(word_starts_map_.size()); 1035 map_item->set_item_count(word_starts_map_.size());
1095 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); 1036 for (const auto& entry : word_starts_map_) {
1096 iter != word_starts_map_.end(); ++iter) {
1097 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); 1037 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
1098 map_entry->set_history_id(iter->first); 1038 map_entry->set_history_id(entry.first);
1099 const RowWordStarts& word_starts(iter->second); 1039 const RowWordStarts& word_starts(entry.second);
1100 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); 1040 for (auto url_word_start : word_starts.url_word_starts_)
1101 i != word_starts.url_word_starts_.end(); ++i) 1041 map_entry->add_url_word_starts(url_word_start);
1102 map_entry->add_url_word_starts(*i); 1042 for (auto title_word_start : word_starts.title_word_starts_)
1103 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); 1043 map_entry->add_title_word_starts(title_word_start);
1104 i != word_starts.title_word_starts_.end(); ++i)
1105 map_entry->add_title_word_starts(*i);
1106 } 1044 }
1107 } 1045 }
1108 1046
1109 bool URLIndexPrivateData::RestorePrivateData( 1047 bool URLIndexPrivateData::RestorePrivateData(
1110 const InMemoryURLIndexCacheItem& cache) { 1048 const InMemoryURLIndexCacheItem& cache) {
1111 last_time_rebuilt_from_history_ = 1049 last_time_rebuilt_from_history_ =
1112 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); 1050 base::Time::FromInternalValue(cache.last_rebuild_timestamp());
1113 const base::TimeDelta rebuilt_ago = 1051 const base::TimeDelta rebuilt_ago =
1114 base::Time::Now() - last_time_rebuilt_from_history_; 1052 base::Time::Now() - last_time_rebuilt_from_history_;
1115 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || 1053 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
1116 (rebuilt_ago < base::TimeDelta::FromDays(-1))) { 1054 (rebuilt_ago < base::TimeDelta::FromDays(-1))) {
1117 // Cache is more than a week old or, somehow, from some time in the future. 1055 // Cache is more than a week old or, somehow, from some time in the future.
1118 // It's probably a good time to rebuild the index from history to 1056 // It's probably a good time to rebuild the index from history to
1119 // allow synced entries to now appear, expired entries to disappear, etc. 1057 // allow synced entrys to now appear, expired entries to disappear, etc.
dyaroshev 2017/02/17 20:17:23 Fix with next patch!
1120 // Allow one day in the future to make the cache not rebuild on simple 1058 // Allow one day in the future to make the cache not rebuild on simple
1121 // system clock changes such as time zone changes. 1059 // system clock changes such as time zone changes.
1122 return false; 1060 return false;
1123 } 1061 }
1124 if (cache.has_version()) { 1062 if (cache.has_version()) {
1125 if (cache.version() < kCurrentCacheFileVersion) { 1063 if (cache.version() < kCurrentCacheFileVersion) {
1126 // Don't try to restore an old format cache file. (This will cause 1064 // Don't try to restore an old format cache file. (This will cause
1127 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData 1065 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData
1128 // from history.) 1066 // from history.)
1129 return false; 1067 return false;
1130 } 1068 }
1131 restored_cache_version_ = cache.version(); 1069 restored_cache_version_ = cache.version();
1132 } 1070 }
1133 return RestoreWordList(cache) && RestoreWordMap(cache) && 1071 return RestoreWordList(cache) && RestoreWordMap(cache) &&
1134 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && 1072 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
1135 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); 1073 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);
1136 } 1074 }
1137 1075
1138 bool URLIndexPrivateData::RestoreWordList( 1076 bool URLIndexPrivateData::RestoreWordList(
1139 const InMemoryURLIndexCacheItem& cache) { 1077 const InMemoryURLIndexCacheItem& cache) {
1140 if (!cache.has_word_list()) 1078 if (!cache.has_word_list())
1141 return false; 1079 return false;
1142 const WordListItem& list_item(cache.word_list()); 1080 const WordListItem& list_item(cache.word_list());
1143 uint32_t expected_item_count = list_item.word_count(); 1081 uint32_t expected_item_count = list_item.word_count();
1144 uint32_t actual_item_count = list_item.word_size(); 1082 uint32_t actual_item_count = list_item.word_size();
1145 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1083 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1146 return false; 1084 return false;
1147 const RepeatedPtrField<std::string>& words(list_item.word()); 1085 const RepeatedPtrField<std::string>& words(list_item.word());
1148 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 1086 word_list_.reserve(words.size());
1149 iter != words.end(); ++iter) 1087 std::transform(
1150 word_list_.push_back(base::UTF8ToUTF16(*iter)); 1088 words.begin(), words.end(), std::back_inserter(word_list_),
1089 [](const std::string& word) { return base::UTF8ToUTF16(word); });
1151 return true; 1090 return true;
1152 } 1091 }
1153 1092
1154 bool URLIndexPrivateData::RestoreWordMap( 1093 bool URLIndexPrivateData::RestoreWordMap(
1155 const InMemoryURLIndexCacheItem& cache) { 1094 const InMemoryURLIndexCacheItem& cache) {
1156 if (!cache.has_word_map()) 1095 if (!cache.has_word_map())
1157 return false; 1096 return false;
1158 const WordMapItem& list_item(cache.word_map()); 1097 const WordMapItem& list_item(cache.word_map());
1159 uint32_t expected_item_count = list_item.item_count(); 1098 uint32_t expected_item_count = list_item.item_count();
1160 uint32_t actual_item_count = list_item.word_map_entry_size(); 1099 uint32_t actual_item_count = list_item.word_map_entry_size();
1161 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1100 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1162 return false; 1101 return false;
1163 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 1102 for (const auto& entry : list_item.word_map_entry())
1164 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 1103 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();
dyaroshev 2017/02/17 20:44:37 parallel_set.
1165 iter != entries.end(); ++iter) 1104
1166 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();
1167 return true; 1105 return true;
1168 } 1106 }
1169 1107
1170 bool URLIndexPrivateData::RestoreCharWordMap( 1108 bool URLIndexPrivateData::RestoreCharWordMap(
1171 const InMemoryURLIndexCacheItem& cache) { 1109 const InMemoryURLIndexCacheItem& cache) {
1172 if (!cache.has_char_word_map()) 1110 if (!cache.has_char_word_map())
1173 return false; 1111 return false;
1174 const CharWordMapItem& list_item(cache.char_word_map()); 1112 const CharWordMapItem& list_item(cache.char_word_map());
1175 uint32_t expected_item_count = list_item.item_count(); 1113 uint32_t expected_item_count = list_item.item_count();
1176 uint32_t actual_item_count = list_item.char_word_map_entry_size(); 1114 uint32_t actual_item_count = list_item.char_word_map_entry_size();
1177 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1115 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1178 return false; 1116 return false;
1179 const RepeatedPtrField<CharWordMapEntry>& 1117
1180 entries(list_item.char_word_map_entry()); 1118 for (const auto& entry : list_item.char_word_map_entry()) {
1181 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = 1119 expected_item_count = entry.item_count();
1182 entries.begin(); iter != entries.end(); ++iter) { 1120 actual_item_count = entry.word_id_size();
1183 expected_item_count = iter->item_count();
1184 actual_item_count = iter->word_id_size();
1185 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1121 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1186 return false; 1122 return false;
1187 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); 1123 base::char16 uni_char = static_cast<base::char16>(entry.char_16());
1188 WordIDSet word_id_set; 1124 const RepeatedField<int32_t>& word_ids(entry.word_id());
1189 const RepeatedField<int32_t>& word_ids(iter->word_id()); 1125 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};
1190 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();
1191 jiter != word_ids.end(); ++jiter)
1192 word_id_set.insert(*jiter);
1193 char_word_map_[uni_char] = word_id_set;
1194 } 1126 }
1195 return true; 1127 return true;
1196 } 1128 }
1197 1129
1198 bool URLIndexPrivateData::RestoreWordIDHistoryMap( 1130 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
1199 const InMemoryURLIndexCacheItem& cache) { 1131 const InMemoryURLIndexCacheItem& cache) {
1200 if (!cache.has_word_id_history_map()) 1132 if (!cache.has_word_id_history_map())
1201 return false; 1133 return false;
1202 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); 1134 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1203 uint32_t expected_item_count = list_item.item_count(); 1135 uint32_t expected_item_count = list_item.item_count();
1204 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); 1136 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();
1205 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1137 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1206 return false; 1138 return false;
1207 const RepeatedPtrField<WordIDHistoryMapEntry>& 1139 for (const auto& entry : list_item.word_id_history_map_entry()) {
1208 entries(list_item.word_id_history_map_entry()); 1140 expected_item_count = entry.item_count();
1209 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1141 actual_item_count = entry.history_id_size();
1210 entries.begin(); iter != entries.end(); ++iter) {
1211 expected_item_count = iter->item_count();
1212 actual_item_count = iter->history_id_size();
1213 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1142 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1214 return false; 1143 return false;
1215 WordID word_id = iter->word_id(); 1144 WordID word_id = entry.word_id();
1216 HistoryIDSet history_id_set; 1145 const RepeatedField<int64_t>& history_ids(entry.history_id());
1217 const RepeatedField<int64_t>& history_ids(iter->history_id()); 1146 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};
1218 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin(); 1147 for (HistoryID history_id : history_ids)
dyaroshev 2017/02/17 20:44:38 parallel_set.
1219 jiter != history_ids.end(); ++jiter) { 1148 history_id_word_map_[history_id].insert(word_id);
1220 history_id_set.insert(*jiter);
1221 AddToHistoryIDWordMap(*jiter, word_id);
1222 }
1223 word_id_history_map_[word_id] = history_id_set;
1224 } 1149 }
1225 return true; 1150 return true;
1226 } 1151 }
1227 1152
1228 bool URLIndexPrivateData::RestoreHistoryInfoMap( 1153 bool URLIndexPrivateData::RestoreHistoryInfoMap(
1229 const InMemoryURLIndexCacheItem& cache) { 1154 const InMemoryURLIndexCacheItem& cache) {
1230 if (!cache.has_history_info_map()) 1155 if (!cache.has_history_info_map())
1231 return false; 1156 return false;
1232 const HistoryInfoMapItem& list_item(cache.history_info_map()); 1157 const HistoryInfoMapItem& list_item(cache.history_info_map());
1233 uint32_t expected_item_count = list_item.item_count(); 1158 uint32_t expected_item_count = list_item.item_count();
1234 uint32_t actual_item_count = list_item.history_info_map_entry_size(); 1159 uint32_t actual_item_count = list_item.history_info_map_entry_size();
1235 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1160 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1236 return false; 1161 return false;
1237 const RepeatedPtrField<HistoryInfoMapEntry>& 1162
1238 entries(list_item.history_info_map_entry()); 1163 for (const auto& entry : list_item.history_info_map_entry()) {
1239 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = 1164 HistoryID history_id = entry.history_id();
1240 entries.begin(); iter != entries.end(); ++iter) { 1165 history::URLRow url_row(GURL(entry.url()), history_id);
1241 HistoryID history_id = iter->history_id(); 1166 url_row.set_visit_count(entry.visit_count());
1242 GURL url(iter->url()); 1167 url_row.set_typed_count(entry.typed_count());
1243 history::URLRow url_row(url, history_id); 1168 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));
1244 url_row.set_visit_count(iter->visit_count()); 1169 if (entry.has_title())
1245 url_row.set_typed_count(iter->typed_count()); 1170 url_row.set_title(base::UTF8ToUTF16(entry.title()));
1246 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1171 history_info_map_[history_id].url_row = std::move(url_row);
1247 if (iter->has_title()) {
1248 base::string16 title(base::UTF8ToUTF16(iter->title()));
1249 url_row.set_title(title);
1250 }
1251 history_info_map_[history_id].url_row = url_row;
1252 1172
1253 // Restore visits list. 1173 // Restore visits list.
1254 VisitInfoVector visits; 1174 VisitInfoVector visits;
1255 visits.reserve(iter->visits_size()); 1175 visits.reserve(entry.visits_size());
1256 for (int i = 0; i < iter->visits_size(); ++i) { 1176 for (const auto& entry_visit : entry.visits()) {
1257 visits.push_back(std::make_pair( 1177 visits.emplace_back(
1258 base::Time::FromInternalValue(iter->visits(i).visit_time()), 1178 base::Time::FromInternalValue(entry_visit.visit_time()),
1259 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); 1179 ui::PageTransitionFromInt(entry_visit.transition_type()));
1260 } 1180 }
1261 history_info_map_[history_id].visits = visits; 1181 history_info_map_[history_id].visits = std::move(visits);
1262 } 1182 }
1263 return true; 1183 return true;
1264 } 1184 }
1265 1185
1266 bool URLIndexPrivateData::RestoreWordStartsMap( 1186 bool URLIndexPrivateData::RestoreWordStartsMap(
1267 const InMemoryURLIndexCacheItem& cache) { 1187 const InMemoryURLIndexCacheItem& cache) {
1268 // Note that this function must be called after RestoreHistoryInfoMap() has 1188 // Note that this function must be called after RestoreHistoryInfoMap() has
1269 // been run as the word starts may have to be recalculated from the urls and 1189 // been run as the word starts may have to be recalculated from the urls and
1270 // page titles. 1190 // page titles.
1271 if (cache.has_word_starts_map()) { 1191 if (cache.has_word_starts_map()) {
1272 const WordStartsMapItem& list_item(cache.word_starts_map()); 1192 const WordStartsMapItem& list_item(cache.word_starts_map());
1273 uint32_t expected_item_count = list_item.item_count(); 1193 uint32_t expected_item_count = list_item.item_count();
1274 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); 1194 uint32_t actual_item_count = list_item.word_starts_map_entry_size();
1275 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1195 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1276 return false; 1196 return false;
1277 const RepeatedPtrField<WordStartsMapEntry>& 1197 for (const auto& entry : list_item.word_starts_map_entry()) {
1278 entries(list_item.word_starts_map_entry()); 1198 HistoryID history_id = entry.history_id();
1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
1280 entries.begin(); iter != entries.end(); ++iter) {
1281 HistoryID history_id = iter->history_id();
1282 RowWordStarts word_starts; 1199 RowWordStarts word_starts;
1283 // Restore the URL word starts. 1200 // Restore the URL word starts.
1284 const RepeatedField<int32_t>& url_starts(iter->url_word_starts()); 1201 const RepeatedField<int32_t>& url_starts(entry.url_word_starts());
1285 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin(); 1202 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};
1286 jiter != url_starts.end(); ++jiter) 1203
1287 word_starts.url_word_starts_.push_back(*jiter);
1288 // Restore the page title word starts. 1204 // Restore the page title word starts.
1289 const RepeatedField<int32_t>& title_starts(iter->title_word_starts()); 1205 const RepeatedField<int32_t>& title_starts(entry.title_word_starts());
1290 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin(); 1206 word_starts.title_word_starts_ = {title_starts.begin(),
1291 jiter != title_starts.end(); ++jiter) 1207 title_starts.end()};
1292 word_starts.title_word_starts_.push_back(*jiter); 1208
1293 word_starts_map_[history_id] = word_starts; 1209 word_starts_map_[history_id] = std::move(word_starts);
1294 } 1210 }
1295 } else { 1211 } else {
1296 // Since the cache did not contain any word starts we must rebuild then from 1212 // Since the cache did not contain any word starts we must rebuild then from
1297 // the URL and page titles. 1213 // the URL and page titles.
1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1214 for (const auto& entry : history_info_map_) {
dyaroshev 2017/02/17 20:44:38 parallel_set.
1299 iter != history_info_map_.end(); ++iter) {
1300 RowWordStarts word_starts; 1215 RowWordStarts word_starts;
1301 const history::URLRow& row(iter->second.url_row); 1216 const history::URLRow& row(entry.second.url_row);
1302 const base::string16& url = 1217 const base::string16& url =
1303 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); 1218 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);
1304 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1219 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1305 const base::string16& title = 1220 const base::string16& title =
1306 bookmarks::CleanUpTitleForMatching(row.title()); 1221 bookmarks::CleanUpTitleForMatching(row.title());
1307 String16VectorFromString16(title, false, &word_starts.title_word_starts_); 1222 String16VectorFromString16(title, false, &word_starts.title_word_starts_);
1308 word_starts_map_[iter->first] = word_starts; 1223 word_starts_map_[entry.first] = std::move(word_starts);
1309 } 1224 }
1310 } 1225 }
1311 return true; 1226 return true;
1312 } 1227 }
1313 1228
1314 // static 1229 // static
1315 bool URLIndexPrivateData::URLSchemeIsWhitelisted( 1230 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1316 const GURL& gurl, 1231 const GURL& gurl,
1317 const std::set<std::string>& whitelist) { 1232 const std::set<std::string>& whitelist) {
1318 return whitelist.find(gurl.scheme()) != whitelist.end(); 1233 return whitelist.find(gurl.scheme()) != whitelist.end();
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
1381 // First cut: typed count, visit count, recency. 1296 // First cut: typed count, visit count, recency.
1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1297 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1383 // recently visited (within the last 12/24 hours) as highly important. Get 1298 // recently visited (within the last 12/24 hours) as highly important. Get
1384 // input from mpearson. 1299 // input from mpearson.
1385 if (r1.typed_count() != r2.typed_count()) 1300 if (r1.typed_count() != r2.typed_count())
1386 return (r1.typed_count() > r2.typed_count()); 1301 return (r1.typed_count() > r2.typed_count());
1387 if (r1.visit_count() != r2.visit_count()) 1302 if (r1.visit_count() != r2.visit_count())
1388 return (r1.visit_count() > r2.visit_count()); 1303 return (r1.visit_count() > r2.visit_count());
1389 return (r1.last_visit() > r2.last_visit()); 1304 return (r1.last_visit() > r2.last_visit());
1390 } 1305 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698