Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(184)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)
Patch Set: Review, round 3. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 19 matching lines...) Expand all
30 #include "components/omnibox/browser/in_memory_url_index.h" 30 #include "components/omnibox/browser/in_memory_url_index.h"
31 #include "components/search_engines/template_url_service.h" 31 #include "components/search_engines/template_url_service.h"
32 #include "components/url_formatter/url_formatter.h" 32 #include "components/url_formatter/url_formatter.h"
33 33
34 #if defined(USE_SYSTEM_PROTOBUF) 34 #if defined(USE_SYSTEM_PROTOBUF)
35 #include <google/protobuf/repeated_field.h> 35 #include <google/protobuf/repeated_field.h>
36 #else 36 #else
37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
38 #endif 38 #endif
39 39
40 namespace {
41
40 using google::protobuf::RepeatedField; 42 using google::protobuf::RepeatedField;
41 using google::protobuf::RepeatedPtrField; 43 using google::protobuf::RepeatedPtrField;
42 using in_memory_url_index::InMemoryURLIndexCacheItem; 44 using in_memory_url_index::InMemoryURLIndexCacheItem;
43 45
44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem 46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem
45 WordListItem; 47 WordListItem;
46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry 48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry
47 WordMapEntry; 49 WordMapEntry;
48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem 51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem
(...skipping 12 matching lines...) Expand all
62 HistoryInfoMapEntry; 64 HistoryInfoMapEntry;
63 typedef in_memory_url_index:: 65 typedef in_memory_url_index::
64 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo 66 InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry_VisitInfo
65 HistoryInfoMapEntry_VisitInfo; 67 HistoryInfoMapEntry_VisitInfo;
66 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem 68 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordStartsMapItem
67 WordStartsMapItem; 69 WordStartsMapItem;
68 typedef in_memory_url_index:: 70 typedef in_memory_url_index::
69 InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry 71 InMemoryURLIndexCacheItem_WordStartsMapItem_WordStartsMapEntry
70 WordStartsMapEntry; 72 WordStartsMapEntry;
71 73
74 // ContainerCast ---------------------------------------------------------------
75 // A helper to Intersect algorithm that takes advantage of copy/move constructor
76 // if the original and resulting sets type are the same.
77
78 // ContainerTo has a constructor from ContainerFrom.
79 template <typename ContainerTo, typename ContainerFrom>
80 ContainerTo ContainerCastImpl(ContainerFrom&& cont, std::true_type) {
81 return ContainerTo(std::forward<ContainerFrom>(cont));
82 }
83
84 // ContainerTo does not have a constructor from ContainerFrom.
85 template <typename ContainerTo, typename ContainerFrom>
86 ContainerTo ContainerCastImpl(const ContainerFrom& cont, std::false_type) {
87 return ContainerTo(cont.begin(), cont.end());
88 }
89
90 // Type traits to disable perfect forwarding.
91 template <typename T>
92 using rvalue_t = typename std::add_rvalue_reference<
93 typename std::remove_reference<T>::type>::type;
94
95 // ContainerTo does not have a constructor from ContainerFrom.
96 // Note: we have to use remove_reference to disable perfect forwarding.
97 template <typename ContainerTo, typename ContainerFrom>
98 ContainerTo ContainerCastImpl(rvalue_t<ContainerFrom> cont, std::false_type) {
99 return ContainerTo(std::make_move_iterator(cont.begin()),
100 std::make_move_iterator(cont.end()));
101 }
102
103 template <typename ContainerTo, typename ContainerFrom>
104 ContainerTo ContinerCast(ContainerFrom&& cont) {
Alexander Yashkin 2017/02/21 07:07:15 ContainerCast?
dyaroshev 2017/02/26 01:12:17 Not applicable.
105 return ContainerCastImpl<ContainerTo>(
106 std::forward<ContainerFrom>(cont),
107 std::is_convertible<ContainerTo, ContainerFrom>{});
108 }
109
72 // Algorithm Functions --------------------------------------------------------- 110 // Algorithm Functions ---------------------------------------------------------
73 111
74 // Comparison function for sorting search terms by descending length. 112 // Comparison function for sorting search terms by descending length.
75 bool LengthGreater(const base::string16& string_a, 113 bool LengthGreater(const base::string16& string_a,
76 const base::string16& string_b) { 114 const base::string16& string_b) {
77 return string_a.length() > string_b.length(); 115 return string_a.length() > string_b.length();
78 } 116 }
79 117
118 // Creates a new set by intersecting all the sets in range defined by |fist|,
Alexander Yashkin 2017/02/21 07:07:15 Can you make this comment more human readable? Wha
dyaroshev 2017/02/22 22:57:25 Another good argument for not inlining this -> thi
dyaroshev 2017/02/23 00:45:54 Significantly revised version: https://godbolt.org
119 // |last|. |get_set| is a customization point for getting the set from the
120 // element of the range.
121 template <typename ResultingSet, typename Iter, typename Projection>
122 ResultingSet IntersectSetsWithProjection(Iter first,
123 Iter last,
124 Projection get_set) {
125 if (first == last)
126 return ResultingSet();
127
128 auto res = ContinerCast<ResultingSet>(get_set(*first));
Peter Kasting 2017/02/25 04:54:42 It seems like the main reason you need ContainerCa
dyaroshev 2017/02/26 01:12:16 Since this is the cleaning up patch and I don't se
Peter Kasting 2017/02/28 00:33:54 It's fine.
129
130 // Like std::accumulate but short circuits if set is empty.
131 for (++first; first != last && !res.empty(); ++first)
132 res = base::STLSetIntersection<ResultingSet>(res, get_set(*first));
133
134 return res;
135 }
136
137 } // namespace
80 138
81 // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- 139 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------
82 140
83 // HistoryDBTask used to update the recent visit data for a particular 141 // HistoryDBTask used to update the recent visit data for a particular
84 // row from the history database. 142 // row from the history database.
85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask { 143 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {
86 public: 144 public:
87 explicit UpdateRecentVisitsFromHistoryDBTask( 145 explicit UpdateRecentVisitsFromHistoryDBTask(
88 URLIndexPrivateData* private_data, 146 URLIndexPrivateData* private_data,
89 history::URLID url_id); 147 history::URLID url_id);
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
187 // the index we need individual, lower-cased words, ignoring escapings. For 245 // the index we need individual, lower-cased words, ignoring escapings. For
188 // the final filtering we need whitespace separated substrings possibly 246 // the final filtering we need whitespace separated substrings possibly
189 // containing escaped characters. 247 // containing escaped characters.
190 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 248 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
191 base::string16 lower_unescaped_string = net::UnescapeURLComponent( 249 base::string16 lower_unescaped_string = net::UnescapeURLComponent(
192 lower_raw_string, 250 lower_raw_string,
193 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 251 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
194 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 252 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
195 253
196 // Extract individual 'words' (as opposed to 'terms'; see comment in 254 // Extract individual 'words' (as opposed to 'terms'; see comment in
197 // HistoryIdSetToScoredMatches()) from the search string. When the user 255 // HistoryIdsToScoredMatches()) from the search string. When the user
198 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", 256 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",
199 // "mstone" and "release". 257 // "mstone" and "release".
200 String16Vector lower_words( 258 String16Vector lower_words(
201 String16VectorFromString16(lower_unescaped_string, false, nullptr)); 259 String16VectorFromString16(lower_unescaped_string, false, nullptr));
202 if (lower_words.empty()) 260 if (lower_words.empty())
203 continue; 261 continue;
204 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); 262
205 pre_filter_item_count_ += history_id_set.size(); 263 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);
206 // Trim the candidate pool if it is large. Note that we do not filter out 264
207 // items that do not contain the search terms as proper substrings -- 265 pre_filter_item_count_ += history_ids.size();
208 // doing so is the performance-costly operation we are trying to avoid in 266 TrimHistoryIdsPool(&history_ids);
209 // order to maintain omnibox responsiveness. 267 post_filter_item_count_ += history_ids.size();
210 const size_t kItemsToScoreLimit = 500; 268
211 if (history_id_set.size() > kItemsToScoreLimit) {
212 HistoryIDVector history_ids;
213 std::copy(history_id_set.begin(), history_id_set.end(),
214 std::back_inserter(history_ids));
215 // Trim down the set by sorting by typed-count, visit-count, and last
216 // visit.
217 HistoryItemFactorGreater item_factor_functor(history_info_map_);
218 std::partial_sort(history_ids.begin(),
219 history_ids.begin() + kItemsToScoreLimit,
220 history_ids.end(), item_factor_functor);
221 history_id_set.clear();
222 std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
223 std::inserter(history_id_set, history_id_set.end()));
224 post_filter_item_count_ += history_id_set.size();
225 } else {
226 post_filter_item_count_ += pre_filter_item_count_;
227 }
228 ScoredHistoryMatches temp_scored_items; 269 ScoredHistoryMatches temp_scored_items;
229 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string, 270 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,
230 template_url_service, bookmark_model, 271 template_url_service, bookmark_model,
231 &temp_scored_items); 272 &temp_scored_items);
232 scored_items.insert(scored_items.end(), temp_scored_items.begin(), 273 scored_items.insert(scored_items.end(), temp_scored_items.begin(),
233 temp_scored_items.end()); 274 temp_scored_items.end());
234 } 275 }
235 // Select and sort only the top |max_matches| results. 276 // Select and sort only the top |max_matches| results.
236 if (scored_items.size() > max_matches) { 277 if (scored_items.size() > max_matches) {
237 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, 278 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
238 scored_items.end(), 279 scored_items.end(),
239 ScoredHistoryMatch::MatchScoreGreater); 280 ScoredHistoryMatch::MatchScoreGreater);
240 scored_items.resize(max_matches); 281 scored_items.resize(max_matches);
241 } else { 282 } else {
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
477 // post_scoring_item_count_ 518 // post_scoring_item_count_
478 } 519 }
479 520
480 bool URLIndexPrivateData::Empty() const { 521 bool URLIndexPrivateData::Empty() const {
481 return history_info_map_.empty(); 522 return history_info_map_.empty();
482 } 523 }
483 524
484 void URLIndexPrivateData::Clear() { 525 void URLIndexPrivateData::Clear() {
485 last_time_rebuilt_from_history_ = base::Time(); 526 last_time_rebuilt_from_history_ = base::Time();
486 word_list_.clear(); 527 word_list_.clear();
487 available_words_.clear(); 528 available_words_ = {};
488 word_map_.clear(); 529 word_map_.clear();
489 char_word_map_.clear(); 530 char_word_map_.clear();
490 word_id_history_map_.clear(); 531 word_id_history_map_.clear();
491 history_id_word_map_.clear(); 532 history_id_word_map_.clear();
492 history_info_map_.clear(); 533 history_info_map_.clear();
493 word_starts_map_.clear(); 534 word_starts_map_.clear();
494 } 535 }
495 536
496 URLIndexPrivateData::~URLIndexPrivateData() {} 537 URLIndexPrivateData::~URLIndexPrivateData() {}
497 538
498 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 539 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords(
499 const String16Vector& unsorted_words) { 540 const String16Vector& unsorted_words) {
541 // The name of the histogram wasn't changed to HistoryIdsFromWords to preserve
542 // old data valid.
Peter Kasting 2017/02/25 04:54:41 Nit: Sounds like we're leaving the old name. If s
dyaroshev 2017/02/26 01:12:16 Done.
500 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords"); 543 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");
501 // Break the terms down into individual terms (words), get the candidate 544 // Break the terms down into individual terms (words), get the candidate
502 // set for each term, and intersect each to get a final candidate list. 545 // set for each term, and intersect each to get a final candidate list.
503 // Note that a single 'term' from the user's perspective might be 546 // Note that a single 'term' from the user's perspective might be
504 // a string like "http://www.somewebsite.com" which, from our perspective, 547 // a string like "http://www.somewebsite.com" which, from our perspective,
505 // is four words: 'http', 'www', 'somewebsite', and 'com'. 548 // is four words: 'http', 'www', 'somewebsite', and 'com'.
506 HistoryIDSet history_id_set;
507 String16Vector words(unsorted_words); 549 String16Vector words(unsorted_words);
508 // Sort the words into the longest first as such are likely to narrow down 550 // Sort the words into the longest first as such are likely to narrow down
509 // the results quicker. Also, single character words are the most expensive 551 // the results quicker. Also, single character words are the most expensive
510 // to process so save them for last. 552 // to process so save them for last.
511 std::sort(words.begin(), words.end(), LengthGreater); 553 std::sort(words.begin(), words.end(), LengthGreater);
512 for (String16Vector::iterator iter = words.begin(); iter != words.end(); 554
513 ++iter) { 555 return IntersectSetsWithProjection<HistoryIDVector>(
514 base::string16 uni_word = *iter; 556 words.begin(), words.end(),
515 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); 557 [this](const base::string16& word) { return HistoryIDsForTerm(word); });
516 if (term_history_set.empty()) { 558 }
517 history_id_set.clear(); 559
518 break; 560 void URLIndexPrivateData::TrimHistoryIdsPool(
519 } 561 HistoryIDVector* history_ids) const {
520 if (iter == words.begin()) { 562 constexpr size_t kItemsToScoreLimit = 500;
521 history_id_set.swap(term_history_set); 563 if (history_ids->size() < kItemsToScoreLimit)
522 } else { 564 return;
523 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>( 565
524 history_id_set, term_history_set); 566 // Trim down the set by sorting by typed-count, visit-count, and last
525 history_id_set.swap(new_history_id_set); 567 // visit.
526 } 568 auto new_end = history_ids->begin() + kItemsToScoreLimit;
527 } 569 HistoryItemFactorGreater item_factor_functor(history_info_map_);
528 return history_id_set; 570
571 std::nth_element(history_ids->begin(), new_end, history_ids->end(),
572 item_factor_functor);
573 history_ids->erase(new_end, history_ids->end());
529 } 574 }
530 575
531 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( 576 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
532 const base::string16& term) { 577 const base::string16& term) {
533 if (term.empty()) 578 if (term.empty())
534 return HistoryIDSet(); 579 return HistoryIDSet();
535 580
536 // TODO(mrossetti): Consider optimizing for very common terms such as 581 // TODO(mrossetti): Consider optimizing for very common terms such as
537 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently 582 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
538 // occuring words in the user's searches. 583 // occuring words in the user's searches.
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
585 630
586 // Reduce the word set with any leftover, unprocessed characters. 631 // Reduce the word set with any leftover, unprocessed characters.
587 if (!unique_chars.empty()) { 632 if (!unique_chars.empty()) {
588 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); 633 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
589 // We might come up empty on the leftovers. 634 // We might come up empty on the leftovers.
590 if (leftover_set.empty()) { 635 if (leftover_set.empty()) {
591 search_term_cache_[term] = SearchTermCacheItem(); 636 search_term_cache_[term] = SearchTermCacheItem();
592 return HistoryIDSet(); 637 return HistoryIDSet();
593 } 638 }
594 // Or there may not have been a prefix from which to start. 639 // Or there may not have been a prefix from which to start.
595 if (prefix_chars.empty()) { 640 word_id_set = prefix_chars.empty() ? std::move(leftover_set)
596 word_id_set.swap(leftover_set); 641 : base::STLSetIntersection<WordIDSet>(
597 } else { 642 word_id_set, leftover_set);
598 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
599 word_id_set, leftover_set);
600 word_id_set.swap(new_word_id_set);
601 }
602 } 643 }
603 644
604 // We must filter the word list because the resulting word set surely 645 // We must filter the word list because the resulting word set surely
605 // contains words which do not have the search term as a proper subset. 646 // contains words which do not have the search term as a proper subset.
606 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 647 for (WordIDSet::iterator word_set_iter = word_id_set.begin();
607 word_set_iter != word_id_set.end(); ) { 648 word_set_iter != word_id_set.end(); ) {
608 if (word_list_[*word_set_iter].find(term) == base::string16::npos) 649 if (word_list_[*word_set_iter].find(term) == base::string16::npos)
609 word_set_iter = word_id_set.erase(word_set_iter); 650 word_set_iter = word_id_set.erase(word_set_iter);
610 else 651 else
611 ++word_set_iter; 652 ++word_set_iter;
612 } 653 }
613 } else { 654 } else {
614 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 655 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
615 } 656 }
616 657
617 // If any words resulted then we can compose a set of history IDs by unioning 658 // If any words resulted then we can compose a set of history IDs by unioning
618 // the sets from each word. 659 // the sets from each word.
619 HistoryIDSet history_id_set; 660 HistoryIDSet history_id_set;
620 if (!word_id_set.empty()) { 661 for (WordID word_id : word_id_set) {
621 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 662 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
622 word_id_iter != word_id_set.end(); ++word_id_iter) { 663 if (word_iter != word_id_history_map_.end()) {
623 WordID word_id = *word_id_iter; 664 HistoryIDSet& word_history_id_set(word_iter->second);
624 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 665 history_id_set.insert(word_history_id_set.begin(),
625 if (word_iter != word_id_history_map_.end()) { 666 word_history_id_set.end());
626 HistoryIDSet& word_history_id_set(word_iter->second);
627 history_id_set.insert(word_history_id_set.begin(),
628 word_history_id_set.end());
629 }
630 } 667 }
631 } 668 }
632 669
633 // Record a new cache entry for this word if the term is longer than 670 // Record a new cache entry for this word if the term is longer than
634 // a single character. 671 // a single character.
635 if (term_length > 1) 672 if (term_length > 1)
636 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 673 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
637 674
638 return history_id_set; 675 return history_id_set;
639 } 676 }
640 677
641 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( 678 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
642 const Char16Set& term_chars) { 679 const Char16Set& term_chars) {
643 WordIDSet word_id_set; 680 WordIDSet empty_set;
644 for (Char16Set::const_iterator c_iter = term_chars.begin(); 681 auto GetSet = [&](base::char16 c) -> const WordIDSet& {
Peter Kasting 2017/02/25 04:54:42 Nit: I would explicitly capture [this, &empty_set]
dyaroshev 2017/02/26 01:12:17 Not applicable. Returning by value would add an e
Peter Kasting 2017/02/26 01:35:18 True. But is it expensive enough to matter? One
dyaroshev 2017/02/26 01:39:16 Well, for std::set I've seen cases where it does.
645 c_iter != term_chars.end(); ++c_iter) { 682 auto char_iter = char_word_map_.find(c);
646 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); 683 if (char_iter == char_word_map_.end())
647 if (char_iter == char_word_map_.end()) { 684 return empty_set;
648 // A character was not found so there are no matching results: bail. 685 return char_iter->second;
Peter Kasting 2017/02/25 04:54:41 Nit: Simpler: return char_iter == char_word_m
dyaroshev 2017/02/26 01:12:16 Not applicable.
649 word_id_set.clear(); 686 };
650 break; 687 return IntersectSetsWithProjection<WordIDSet>(term_chars.begin(),
651 } 688 term_chars.end(), GetSet);
652 WordIDSet& char_word_id_set(char_iter->second);
653 // It is possible for there to no longer be any words associated with
654 // a particular character. Give up in that case.
655 if (char_word_id_set.empty()) {
656 word_id_set.clear();
657 break;
658 }
659
660 if (c_iter == term_chars.begin()) {
661 // First character results becomes base set of results.
662 word_id_set = char_word_id_set;
663 } else {
664 // Subsequent character results get intersected in.
665 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
666 word_id_set, char_word_id_set);
667 word_id_set.swap(new_word_id_set);
668 }
669 }
670 return word_id_set;
671 } 689 }
672 690
673 void URLIndexPrivateData::HistoryIdSetToScoredMatches( 691 void URLIndexPrivateData::HistoryIdsToScoredMatches(
674 HistoryIDSet history_id_set, 692 HistoryIDVector history_ids,
675 const base::string16& lower_raw_string, 693 const base::string16& lower_raw_string,
676 const TemplateURLService* template_url_service, 694 const TemplateURLService* template_url_service,
677 bookmarks::BookmarkModel* bookmark_model, 695 bookmarks::BookmarkModel* bookmark_model,
678 ScoredHistoryMatches* scored_items) const { 696 ScoredHistoryMatches* scored_items) const {
679 if (history_id_set.empty()) 697 if (history_ids.empty())
680 return; 698 return;
681 699
682 // Break up the raw search string (complete with escaped URL elements) into 700 // Break up the raw search string (complete with escaped URL elements) into
683 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()). 701 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).
684 // We only want to break up the search string on 'true' whitespace rather than 702 // We only want to break up the search string on 'true' whitespace rather than
685 // escaped whitespace. For example, when the user types 703 // escaped whitespace. For example, when the user types
686 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and 704 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and
687 // "release". 705 // "release".
688 String16Vector lower_raw_terms = 706 String16Vector lower_raw_terms =
689 base::SplitString(lower_raw_string, base::kWhitespaceUTF16, 707 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
690 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); 708 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
691 709
692 // Don't score matches when there are no terms to score against. (It's 710 // Don't score matches when there are no terms to score against. (It's
693 // possible that the word break iterater that extracts words to search for in 711 // possible that the word break iterater that extracts words to search for in
694 // the database allows some whitespace "words" whereas SplitString excludes a 712 // the database allows some whitespace "words" whereas SplitString excludes a
695 // long list of whitespace.) One could write a scoring function that gives a 713 // long list of whitespace.) One could write a scoring function that gives a
696 // reasonable order to matches when there are no terms (i.e., all the words 714 // reasonable order to matches when there are no terms (i.e., all the words
697 // are some form of whitespace), but this is such a rare edge case that it's 715 // are some form of whitespace), but this is such a rare edge case that it's
698 // not worth the time. 716 // not worth the time.
699 if (lower_raw_terms.empty()) 717 if (lower_raw_terms.empty())
700 return; 718 return;
701 719
702 WordStarts lower_terms_to_word_starts_offsets; 720 WordStarts lower_terms_to_word_starts_offsets;
703 CalculateWordStartsOffsets(lower_raw_terms, 721 CalculateWordStartsOffsets(lower_raw_terms,
704 &lower_terms_to_word_starts_offsets); 722 &lower_terms_to_word_starts_offsets);
705 723
706 // Filter bad matches and other matches we don't want to display. 724 // Filter bad matches and other matches we don't want to display.
707 for (auto it = history_id_set.begin();;) { 725 {
Peter Kasting 2017/02/25 04:54:42 Nit: {} not necessary
dyaroshev 2017/02/26 01:12:16 Done.
708 it = std::find_if(it, history_id_set.end(), 726 auto filter = [this, template_url_service](const HistoryID history_id) {
709 [this, template_url_service](const HistoryID history_id) { 727 return ShouldFilter(history_id, template_url_service);
710 return ShouldFilter(history_id, template_url_service); 728 };
711 }); 729 history_ids.erase(
712 if (it == history_id_set.end()) 730 std::remove_if(history_ids.begin(), history_ids.end(), filter),
713 break; 731 history_ids.end());
714 it = history_id_set.erase(it);
715 } 732 }
716 733
717 // Score the matches. 734 // Score the matches.
718 const size_t num_matches = history_id_set.size(); 735 const size_t num_matches = history_ids.size();
719 const base::Time now = base::Time::Now(); 736 const base::Time now = base::Time::Now();
720 std::transform( 737 std::transform(
721 history_id_set.begin(), history_id_set.end(), 738 history_ids.begin(), history_ids.end(), std::back_inserter(*scored_items),
722 std::back_inserter(*scored_items), [&](const HistoryID history_id) { 739 [&](const HistoryID history_id) {
723 auto hist_pos = history_info_map_.find(history_id); 740 auto hist_pos = history_info_map_.find(history_id);
724 const history::URLRow& hist_item = hist_pos->second.url_row; 741 const history::URLRow& hist_item = hist_pos->second.url_row;
725 auto starts_pos = word_starts_map_.find(history_id); 742 auto starts_pos = word_starts_map_.find(history_id);
726 DCHECK(starts_pos != word_starts_map_.end()); 743 DCHECK(starts_pos != word_starts_map_.end());
727 return ScoredHistoryMatch( 744 return ScoredHistoryMatch(
728 hist_item, hist_pos->second.visits, lower_raw_string, 745 hist_item, hist_pos->second.visits, lower_raw_string,
729 lower_raw_terms, lower_terms_to_word_starts_offsets, 746 lower_raw_terms, lower_terms_to_word_starts_offsets,
730 starts_pos->second, 747 starts_pos->second,
731 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), 748 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),
732 num_matches, now); 749 num_matches, now);
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 HistoryID history_id = static_cast<HistoryID>(row.id()); 840 HistoryID history_id = static_cast<HistoryID>(row.id());
824 // Split URL into individual, unique words then add in the title words. 841 // Split URL into individual, unique words then add in the title words.
825 const GURL& gurl(row.url()); 842 const GURL& gurl(row.url());
826 const base::string16& url = 843 const base::string16& url =
827 bookmarks::CleanUpUrlForMatching(gurl, nullptr); 844 bookmarks::CleanUpUrlForMatching(gurl, nullptr);
828 String16Set url_words = String16SetFromString16(url, 845 String16Set url_words = String16SetFromString16(url,
829 word_starts ? &word_starts->url_word_starts_ : nullptr); 846 word_starts ? &word_starts->url_word_starts_ : nullptr);
830 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); 847 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
831 String16Set title_words = String16SetFromString16(title, 848 String16Set title_words = String16SetFromString16(title,
832 word_starts ? &word_starts->title_word_starts_ : nullptr); 849 word_starts ? &word_starts->title_word_starts_ : nullptr);
833 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); 850 for (const auto& word :
834 for (String16Set::iterator word_iter = words.begin(); 851 base::STLSetUnion<String16Set>(url_words, title_words))
835 word_iter != words.end(); ++word_iter) 852 AddWordToIndex(word, history_id);
836 AddWordToIndex(*word_iter, history_id);
837 853
838 search_term_cache_.clear(); // Invalidate the term cache. 854 search_term_cache_.clear(); // Invalidate the term cache.
839 } 855 }
840 856
841 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, 857 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
842 HistoryID history_id) { 858 HistoryID history_id) {
843 WordMap::iterator word_pos = word_map_.find(term); 859 WordMap::iterator word_pos;
844 if (word_pos != word_map_.end()) 860 bool is_new;
845 UpdateWordHistory(word_pos->second, history_id); 861 std::tie(word_pos, is_new) = word_map_.emplace(term, WordID());
846 else 862
847 AddWordHistory(term, history_id); 863 // Adding a new word (i.e. a word that is not already in the word index).
864 if (is_new) {
865 word_pos->second = AddNewWordToWordList(term);
866
867 // For each character in the newly added word add the word to the character
868 // index.
869 for (base::char16 uni_char : Char16SetFromString16(term))
870 char_word_map_[uni_char].insert(word_pos->second);
871 }
872
873 word_id_history_map_[word_pos->second].insert(history_id);
874 history_id_word_map_[history_id].insert(word_pos->second);
848 } 875 }
849 876
850 void URLIndexPrivateData::AddWordHistory(const base::string16& term, 877 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {
851 HistoryID history_id) {
852 WordID word_id = word_list_.size(); 878 WordID word_id = word_list_.size();
853 if (available_words_.empty()) { 879 if (available_words_.empty()) {
854 word_list_.push_back(term); 880 word_list_.push_back(term);
855 } else { 881 return word_id;
856 word_id = *(available_words_.begin());
857 word_list_[word_id] = term;
858 available_words_.erase(word_id);
859 } 882 }
860 word_map_[term] = word_id;
861 883
862 HistoryIDSet history_id_set; 884 word_id = available_words_.top();
863 history_id_set.insert(history_id); 885 available_words_.pop();
864 word_id_history_map_[word_id] = history_id_set; 886 word_list_[word_id] = term;
865 AddToHistoryIDWordMap(history_id, word_id); 887 return word_id;
866
867 // For each character in the newly added word (i.e. a word that is not
868 // already in the word index), add the word to the character index.
869 Char16Set characters = Char16SetFromString16(term);
870 for (Char16Set::iterator uni_char_iter = characters.begin();
871 uni_char_iter != characters.end(); ++uni_char_iter) {
872 base::char16 uni_char = *uni_char_iter;
873 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
874 if (char_iter != char_word_map_.end()) {
875 // Update existing entry in the char/word index.
876 WordIDSet& word_id_set(char_iter->second);
877 word_id_set.insert(word_id);
878 } else {
879 // Create a new entry in the char/word index.
880 WordIDSet word_id_set;
881 word_id_set.insert(word_id);
882 char_word_map_[uni_char] = word_id_set;
883 }
884 }
885 }
886
887 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
888 HistoryID history_id) {
889 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
890 DCHECK(history_pos != word_id_history_map_.end());
891 HistoryIDSet& history_id_set(history_pos->second);
892 history_id_set.insert(history_id);
893 AddToHistoryIDWordMap(history_id, word_id);
894 }
895
896 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
897 WordID word_id) {
898 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
899 if (iter != history_id_word_map_.end()) {
900 WordIDSet& word_id_set(iter->second);
901 word_id_set.insert(word_id);
902 } else {
903 WordIDSet word_id_set;
904 word_id_set.insert(word_id);
905 history_id_word_map_[history_id] = word_id_set;
906 }
907 } 888 }
908 889
909 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) { 890 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {
910 RemoveRowWordsFromIndex(row); 891 RemoveRowWordsFromIndex(row);
911 HistoryID history_id = static_cast<HistoryID>(row.id()); 892 HistoryID history_id = static_cast<HistoryID>(row.id());
912 history_info_map_.erase(history_id); 893 history_info_map_.erase(history_id);
913 word_starts_map_.erase(history_id); 894 word_starts_map_.erase(history_id);
914 } 895 }
915 896
916 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) { 897 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {
917 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 898 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
918 // this row. 899 // this row.
919 HistoryID history_id = static_cast<HistoryID>(row.id()); 900 HistoryID history_id = static_cast<HistoryID>(row.id());
920 WordIDSet word_id_set = history_id_word_map_[history_id]; 901 WordIDSet word_id_set = history_id_word_map_[history_id];
921 history_id_word_map_.erase(history_id); 902 history_id_word_map_.erase(history_id);
922 903
923 // Reconcile any changes to word usage. 904 // Reconcile any changes to word usage.
924 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 905 for (WordID word_id : word_id_set) {
925 word_id_iter != word_id_set.end(); ++word_id_iter) { 906 auto word_id_history_map_iter = word_id_history_map_.find(word_id);
926 WordID word_id = *word_id_iter; 907 DCHECK(word_id_history_map_iter != word_id_history_map_.end());
927 word_id_history_map_[word_id].erase(history_id); 908
928 if (!word_id_history_map_[word_id].empty()) 909 word_id_history_map_iter->second.erase(history_id);
929 continue; // The word is still in use. 910 if (!word_id_history_map_iter->second.empty())
911 continue;
930 912
931 // The word is no longer in use. Reconcile any changes to character usage. 913 // The word is no longer in use. Reconcile any changes to character usage.
932 base::string16 word = word_list_[word_id]; 914 base::string16 word = word_list_[word_id];
933 Char16Set characters = Char16SetFromString16(word); 915 for (base::char16 uni_char : Char16SetFromString16(word)) {
934 for (Char16Set::iterator uni_char_iter = characters.begin(); 916 auto char_word_map_iter = char_word_map_.find(uni_char);
935 uni_char_iter != characters.end(); ++uni_char_iter) { 917 char_word_map_iter->second.erase(word_id);
936 base::char16 uni_char = *uni_char_iter; 918 if (char_word_map_iter->second.empty())
937 char_word_map_[uni_char].erase(word_id); 919 char_word_map_.erase(char_word_map_iter);
938 if (char_word_map_[uni_char].empty())
939 char_word_map_.erase(uni_char); // No longer in use.
940 } 920 }
941 921
942 // Complete the removal of references to the word. 922 // Complete the removal of references to the word.
943 word_id_history_map_.erase(word_id); 923 word_id_history_map_.erase(word_id_history_map_iter);
944 word_map_.erase(word); 924 word_map_.erase(word);
945 word_list_[word_id] = base::string16(); 925 word_list_[word_id] = base::string16();
946 available_words_.insert(word_id); 926 available_words_.push(word_id);
947 } 927 }
948 } 928 }
949 929
950 void URLIndexPrivateData::ResetSearchTermCache() { 930 void URLIndexPrivateData::ResetSearchTermCache() {
951 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 931 for (auto& item : search_term_cache_)
952 iter != search_term_cache_.end(); ++iter) 932 item.second.used_ = false;
953 iter->second.used_ = false;
954 } 933 }
955 934
956 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) { 935 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {
957 base::TimeTicks beginning_time = base::TimeTicks::Now(); 936 base::TimeTicks beginning_time = base::TimeTicks::Now();
958 InMemoryURLIndexCacheItem index_cache; 937 InMemoryURLIndexCacheItem index_cache;
959 SavePrivateData(&index_cache); 938 SavePrivateData(&index_cache);
960 std::string data; 939 std::string data;
961 if (!index_cache.SerializeToString(&data)) { 940 if (!index_cache.SerializeToString(&data)) {
962 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 941 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
963 return false; 942 return false;
(...skipping 24 matching lines...) Expand all
988 SaveWordIDHistoryMap(cache); 967 SaveWordIDHistoryMap(cache);
989 SaveHistoryInfoMap(cache); 968 SaveHistoryInfoMap(cache);
990 SaveWordStartsMap(cache); 969 SaveWordStartsMap(cache);
991 } 970 }
992 971
993 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 972 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
994 if (word_list_.empty()) 973 if (word_list_.empty())
995 return; 974 return;
996 WordListItem* list_item = cache->mutable_word_list(); 975 WordListItem* list_item = cache->mutable_word_list();
997 list_item->set_word_count(word_list_.size()); 976 list_item->set_word_count(word_list_.size());
998 for (String16Vector::const_iterator iter = word_list_.begin(); 977 for (const base::string16& word : word_list_)
999 iter != word_list_.end(); ++iter) 978 list_item->add_word(base::UTF16ToUTF8(word));
1000 list_item->add_word(base::UTF16ToUTF8(*iter));
1001 } 979 }
1002 980
1003 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 981 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
1004 if (word_map_.empty()) 982 if (word_map_.empty())
1005 return; 983 return;
1006 WordMapItem* map_item = cache->mutable_word_map(); 984 WordMapItem* map_item = cache->mutable_word_map();
1007 map_item->set_item_count(word_map_.size()); 985 map_item->set_item_count(word_map_.size());
1008 for (WordMap::const_iterator iter = word_map_.begin(); 986 for (const auto& elem : word_map_) {
1009 iter != word_map_.end(); ++iter) {
1010 WordMapEntry* map_entry = map_item->add_word_map_entry(); 987 WordMapEntry* map_entry = map_item->add_word_map_entry();
1011 map_entry->set_word(base::UTF16ToUTF8(iter->first)); 988 map_entry->set_word(base::UTF16ToUTF8(elem.first));
1012 map_entry->set_word_id(iter->second); 989 map_entry->set_word_id(elem.second);
1013 } 990 }
1014 } 991 }
1015 992
1016 void URLIndexPrivateData::SaveCharWordMap( 993 void URLIndexPrivateData::SaveCharWordMap(
1017 InMemoryURLIndexCacheItem* cache) const { 994 InMemoryURLIndexCacheItem* cache) const {
1018 if (char_word_map_.empty()) 995 if (char_word_map_.empty())
1019 return; 996 return;
1020 CharWordMapItem* map_item = cache->mutable_char_word_map(); 997 CharWordMapItem* map_item = cache->mutable_char_word_map();
1021 map_item->set_item_count(char_word_map_.size()); 998 map_item->set_item_count(char_word_map_.size());
1022 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 999 for (const auto& entry : char_word_map_) {
1023 iter != char_word_map_.end(); ++iter) {
1024 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 1000 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
1025 map_entry->set_char_16(iter->first); 1001 map_entry->set_char_16(entry.first);
1026 const WordIDSet& word_id_set(iter->second); 1002 const WordIDSet& word_id_set(entry.second);
1027 map_entry->set_item_count(word_id_set.size()); 1003 map_entry->set_item_count(word_id_set.size());
1028 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 1004 for (WordID word_id : word_id_set)
1029 set_iter != word_id_set.end(); ++set_iter) 1005 map_entry->add_word_id(word_id);
1030 map_entry->add_word_id(*set_iter);
1031 } 1006 }
1032 } 1007 }
1033 1008
1034 void URLIndexPrivateData::SaveWordIDHistoryMap( 1009 void URLIndexPrivateData::SaveWordIDHistoryMap(
1035 InMemoryURLIndexCacheItem* cache) const { 1010 InMemoryURLIndexCacheItem* cache) const {
1036 if (word_id_history_map_.empty()) 1011 if (word_id_history_map_.empty())
1037 return; 1012 return;
1038 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 1013 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
1039 map_item->set_item_count(word_id_history_map_.size()); 1014 map_item->set_item_count(word_id_history_map_.size());
1040 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 1015 for (const auto& entry : word_id_history_map_) {
1041 iter != word_id_history_map_.end(); ++iter) {
1042 WordIDHistoryMapEntry* map_entry = 1016 WordIDHistoryMapEntry* map_entry =
1043 map_item->add_word_id_history_map_entry(); 1017 map_item->add_word_id_history_map_entry();
1044 map_entry->set_word_id(iter->first); 1018 map_entry->set_word_id(entry.first);
1045 const HistoryIDSet& history_id_set(iter->second); 1019 const HistoryIDSet& history_id_set(entry.second);
1046 map_entry->set_item_count(history_id_set.size()); 1020 map_entry->set_item_count(history_id_set.size());
1047 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 1021 for (HistoryID history_id : history_id_set)
1048 set_iter != history_id_set.end(); ++set_iter) 1022 map_entry->add_history_id(history_id);
1049 map_entry->add_history_id(*set_iter);
1050 } 1023 }
1051 } 1024 }
1052 1025
1053 void URLIndexPrivateData::SaveHistoryInfoMap( 1026 void URLIndexPrivateData::SaveHistoryInfoMap(
1054 InMemoryURLIndexCacheItem* cache) const { 1027 InMemoryURLIndexCacheItem* cache) const {
1055 if (history_info_map_.empty()) 1028 if (history_info_map_.empty())
1056 return; 1029 return;
1057 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 1030 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1058 map_item->set_item_count(history_info_map_.size()); 1031 map_item->set_item_count(history_info_map_.size());
1059 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1032 for (const auto& entry : history_info_map_) {
1060 iter != history_info_map_.end(); ++iter) {
1061 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1033 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1062 map_entry->set_history_id(iter->first); 1034 map_entry->set_history_id(entry.first);
1063 const history::URLRow& url_row(iter->second.url_row); 1035 const history::URLRow& url_row(entry.second.url_row);
1064 // Note: We only save information that contributes to the index so there 1036 // Note: We only save information that contributes to the index so there
1065 // is no need to save search_term_cache_ (not persistent). 1037 // is no need to save search_term_cache_ (not persistent).
1066 map_entry->set_visit_count(url_row.visit_count()); 1038 map_entry->set_visit_count(url_row.visit_count());
1067 map_entry->set_typed_count(url_row.typed_count()); 1039 map_entry->set_typed_count(url_row.typed_count());
1068 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1040 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1069 map_entry->set_url(url_row.url().spec()); 1041 map_entry->set_url(url_row.url().spec());
1070 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); 1042 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
1071 const VisitInfoVector& visits(iter->second.visits); 1043 for (const auto& visit : entry.second.visits) {
1072 for (VisitInfoVector::const_iterator visit_iter = visits.begin();
1073 visit_iter != visits.end(); ++visit_iter) {
1074 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits(); 1044 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();
1075 visit_info->set_visit_time(visit_iter->first.ToInternalValue()); 1045 visit_info->set_visit_time(visit.first.ToInternalValue());
1076 visit_info->set_transition_type(visit_iter->second); 1046 visit_info->set_transition_type(visit.second);
1077 } 1047 }
1078 } 1048 }
1079 } 1049 }
1080 1050
1081 void URLIndexPrivateData::SaveWordStartsMap( 1051 void URLIndexPrivateData::SaveWordStartsMap(
1082 InMemoryURLIndexCacheItem* cache) const { 1052 InMemoryURLIndexCacheItem* cache) const {
1083 if (word_starts_map_.empty()) 1053 if (word_starts_map_.empty())
1084 return; 1054 return;
1085 // For unit testing: Enable saving of the cache as an earlier version to 1055 // For unit testing: Enable saving of the cache as an earlier version to
1086 // allow testing of cache file upgrading in ReadFromFile(). 1056 // allow testing of cache file upgrading in ReadFromFile().
1087 // TODO(mrossetti): Instead of intruding on production code with this kind of 1057 // TODO(mrossetti): Instead of intruding on production code with this kind of
1088 // test harness, save a copy of an older version cache with known results. 1058 // test harness, save a copy of an older version cache with known results.
1089 // Implement this when switching the caching over to SQLite. 1059 // Implement this when switching the caching over to SQLite.
1090 if (saved_cache_version_ < 1) 1060 if (saved_cache_version_ < 1)
1091 return; 1061 return;
1092 1062
1093 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); 1063 WordStartsMapItem* map_item = cache->mutable_word_starts_map();
1094 map_item->set_item_count(word_starts_map_.size()); 1064 map_item->set_item_count(word_starts_map_.size());
1095 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); 1065 for (const auto& entry : word_starts_map_) {
1096 iter != word_starts_map_.end(); ++iter) {
1097 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); 1066 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
1098 map_entry->set_history_id(iter->first); 1067 map_entry->set_history_id(entry.first);
1099 const RowWordStarts& word_starts(iter->second); 1068 const RowWordStarts& word_starts(entry.second);
1100 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); 1069 for (auto url_word_start : word_starts.url_word_starts_)
1101 i != word_starts.url_word_starts_.end(); ++i) 1070 map_entry->add_url_word_starts(url_word_start);
1102 map_entry->add_url_word_starts(*i); 1071 for (auto title_word_start : word_starts.title_word_starts_)
1103 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); 1072 map_entry->add_title_word_starts(title_word_start);
1104 i != word_starts.title_word_starts_.end(); ++i)
1105 map_entry->add_title_word_starts(*i);
1106 } 1073 }
1107 } 1074 }
1108 1075
1109 bool URLIndexPrivateData::RestorePrivateData( 1076 bool URLIndexPrivateData::RestorePrivateData(
1110 const InMemoryURLIndexCacheItem& cache) { 1077 const InMemoryURLIndexCacheItem& cache) {
1111 last_time_rebuilt_from_history_ = 1078 last_time_rebuilt_from_history_ =
1112 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); 1079 base::Time::FromInternalValue(cache.last_rebuild_timestamp());
1113 const base::TimeDelta rebuilt_ago = 1080 const base::TimeDelta rebuilt_ago =
1114 base::Time::Now() - last_time_rebuilt_from_history_; 1081 base::Time::Now() - last_time_rebuilt_from_history_;
1115 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || 1082 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
(...skipping 22 matching lines...) Expand all
1138 bool URLIndexPrivateData::RestoreWordList( 1105 bool URLIndexPrivateData::RestoreWordList(
1139 const InMemoryURLIndexCacheItem& cache) { 1106 const InMemoryURLIndexCacheItem& cache) {
1140 if (!cache.has_word_list()) 1107 if (!cache.has_word_list())
1141 return false; 1108 return false;
1142 const WordListItem& list_item(cache.word_list()); 1109 const WordListItem& list_item(cache.word_list());
1143 uint32_t expected_item_count = list_item.word_count(); 1110 uint32_t expected_item_count = list_item.word_count();
1144 uint32_t actual_item_count = list_item.word_size(); 1111 uint32_t actual_item_count = list_item.word_size();
1145 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1112 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1146 return false; 1113 return false;
1147 const RepeatedPtrField<std::string>& words(list_item.word()); 1114 const RepeatedPtrField<std::string>& words(list_item.word());
1148 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 1115 word_list_.reserve(words.size());
1149 iter != words.end(); ++iter) 1116 std::transform(
1150 word_list_.push_back(base::UTF8ToUTF16(*iter)); 1117 words.begin(), words.end(), std::back_inserter(word_list_),
1118 [](const std::string& word) { return base::UTF8ToUTF16(word); });
1151 return true; 1119 return true;
1152 } 1120 }
1153 1121
1154 bool URLIndexPrivateData::RestoreWordMap( 1122 bool URLIndexPrivateData::RestoreWordMap(
1155 const InMemoryURLIndexCacheItem& cache) { 1123 const InMemoryURLIndexCacheItem& cache) {
1156 if (!cache.has_word_map()) 1124 if (!cache.has_word_map())
1157 return false; 1125 return false;
1158 const WordMapItem& list_item(cache.word_map()); 1126 const WordMapItem& list_item(cache.word_map());
1159 uint32_t expected_item_count = list_item.item_count(); 1127 uint32_t expected_item_count = list_item.item_count();
1160 uint32_t actual_item_count = list_item.word_map_entry_size(); 1128 uint32_t actual_item_count = list_item.word_map_entry_size();
1161 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1129 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1162 return false; 1130 return false;
1163 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 1131 for (const auto& entry : list_item.word_map_entry())
1164 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 1132 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();
1165 iter != entries.end(); ++iter) 1133
1166 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();
1167 return true; 1134 return true;
1168 } 1135 }
1169 1136
1170 bool URLIndexPrivateData::RestoreCharWordMap( 1137 bool URLIndexPrivateData::RestoreCharWordMap(
1171 const InMemoryURLIndexCacheItem& cache) { 1138 const InMemoryURLIndexCacheItem& cache) {
1172 if (!cache.has_char_word_map()) 1139 if (!cache.has_char_word_map())
1173 return false; 1140 return false;
1174 const CharWordMapItem& list_item(cache.char_word_map()); 1141 const CharWordMapItem& list_item(cache.char_word_map());
1175 uint32_t expected_item_count = list_item.item_count(); 1142 uint32_t expected_item_count = list_item.item_count();
1176 uint32_t actual_item_count = list_item.char_word_map_entry_size(); 1143 uint32_t actual_item_count = list_item.char_word_map_entry_size();
1177 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1144 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1178 return false; 1145 return false;
1179 const RepeatedPtrField<CharWordMapEntry>& 1146
1180 entries(list_item.char_word_map_entry()); 1147 for (const auto& entry : list_item.char_word_map_entry()) {
1181 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = 1148 expected_item_count = entry.item_count();
1182 entries.begin(); iter != entries.end(); ++iter) { 1149 actual_item_count = entry.word_id_size();
1183 expected_item_count = iter->item_count();
1184 actual_item_count = iter->word_id_size();
1185 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1150 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1186 return false; 1151 return false;
1187 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); 1152 base::char16 uni_char = static_cast<base::char16>(entry.char_16());
1188 WordIDSet word_id_set; 1153 const RepeatedField<int32_t>& word_ids(entry.word_id());
Peter Kasting 2017/02/25 04:54:41 Nit: Use = rather than () to init references like
dyaroshev 2017/02/26 01:12:16 Done.
1189 const RepeatedField<int32_t>& word_ids(iter->word_id()); 1154 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};
1190 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();
1191 jiter != word_ids.end(); ++jiter)
1192 word_id_set.insert(*jiter);
1193 char_word_map_[uni_char] = word_id_set;
1194 } 1155 }
1195 return true; 1156 return true;
1196 } 1157 }
1197 1158
1198 bool URLIndexPrivateData::RestoreWordIDHistoryMap( 1159 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
1199 const InMemoryURLIndexCacheItem& cache) { 1160 const InMemoryURLIndexCacheItem& cache) {
1200 if (!cache.has_word_id_history_map()) 1161 if (!cache.has_word_id_history_map())
1201 return false; 1162 return false;
1202 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); 1163 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1203 uint32_t expected_item_count = list_item.item_count(); 1164 uint32_t expected_item_count = list_item.item_count();
1204 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); 1165 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();
1205 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1166 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1206 return false; 1167 return false;
1207 const RepeatedPtrField<WordIDHistoryMapEntry>& 1168 for (const auto& entry : list_item.word_id_history_map_entry()) {
1208 entries(list_item.word_id_history_map_entry()); 1169 expected_item_count = entry.item_count();
1209 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1170 actual_item_count = entry.history_id_size();
1210 entries.begin(); iter != entries.end(); ++iter) {
1211 expected_item_count = iter->item_count();
1212 actual_item_count = iter->history_id_size();
1213 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1171 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1214 return false; 1172 return false;
1215 WordID word_id = iter->word_id(); 1173 WordID word_id = entry.word_id();
1216 HistoryIDSet history_id_set; 1174 const RepeatedField<int64_t>& history_ids(entry.history_id());
1217 const RepeatedField<int64_t>& history_ids(iter->history_id()); 1175 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};
1218 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin(); 1176 for (HistoryID history_id : history_ids)
1219 jiter != history_ids.end(); ++jiter) { 1177 history_id_word_map_[history_id].insert(word_id);
1220 history_id_set.insert(*jiter);
1221 AddToHistoryIDWordMap(*jiter, word_id);
1222 }
1223 word_id_history_map_[word_id] = history_id_set;
1224 } 1178 }
1225 return true; 1179 return true;
1226 } 1180 }
1227 1181
1228 bool URLIndexPrivateData::RestoreHistoryInfoMap( 1182 bool URLIndexPrivateData::RestoreHistoryInfoMap(
1229 const InMemoryURLIndexCacheItem& cache) { 1183 const InMemoryURLIndexCacheItem& cache) {
1230 if (!cache.has_history_info_map()) 1184 if (!cache.has_history_info_map())
1231 return false; 1185 return false;
1232 const HistoryInfoMapItem& list_item(cache.history_info_map()); 1186 const HistoryInfoMapItem& list_item(cache.history_info_map());
1233 uint32_t expected_item_count = list_item.item_count(); 1187 uint32_t expected_item_count = list_item.item_count();
1234 uint32_t actual_item_count = list_item.history_info_map_entry_size(); 1188 uint32_t actual_item_count = list_item.history_info_map_entry_size();
1235 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1189 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1236 return false; 1190 return false;
1237 const RepeatedPtrField<HistoryInfoMapEntry>& 1191
1238 entries(list_item.history_info_map_entry()); 1192 for (const auto& entry : list_item.history_info_map_entry()) {
1239 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = 1193 HistoryID history_id = entry.history_id();
1240 entries.begin(); iter != entries.end(); ++iter) { 1194 history::URLRow url_row(GURL(entry.url()), history_id);
1241 HistoryID history_id = iter->history_id(); 1195 url_row.set_visit_count(entry.visit_count());
1242 GURL url(iter->url()); 1196 url_row.set_typed_count(entry.typed_count());
1243 history::URLRow url_row(url, history_id); 1197 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));
1244 url_row.set_visit_count(iter->visit_count()); 1198 if (entry.has_title())
1245 url_row.set_typed_count(iter->typed_count()); 1199 url_row.set_title(base::UTF8ToUTF16(entry.title()));
1246 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1200 history_info_map_[history_id].url_row = std::move(url_row);
1247 if (iter->has_title()) {
1248 base::string16 title(base::UTF8ToUTF16(iter->title()));
1249 url_row.set_title(title);
1250 }
1251 history_info_map_[history_id].url_row = url_row;
1252 1201
1253 // Restore visits list. 1202 // Restore visits list.
1254 VisitInfoVector visits; 1203 VisitInfoVector visits;
1255 visits.reserve(iter->visits_size()); 1204 visits.reserve(entry.visits_size());
1256 for (int i = 0; i < iter->visits_size(); ++i) { 1205 for (const auto& entry_visit : entry.visits()) {
1257 visits.push_back(std::make_pair( 1206 visits.emplace_back(
1258 base::Time::FromInternalValue(iter->visits(i).visit_time()), 1207 base::Time::FromInternalValue(entry_visit.visit_time()),
1259 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); 1208 ui::PageTransitionFromInt(entry_visit.transition_type()));
1260 } 1209 }
1261 history_info_map_[history_id].visits = visits; 1210 history_info_map_[history_id].visits = std::move(visits);
1262 } 1211 }
1263 return true; 1212 return true;
1264 } 1213 }
1265 1214
1266 bool URLIndexPrivateData::RestoreWordStartsMap( 1215 bool URLIndexPrivateData::RestoreWordStartsMap(
1267 const InMemoryURLIndexCacheItem& cache) { 1216 const InMemoryURLIndexCacheItem& cache) {
1268 // Note that this function must be called after RestoreHistoryInfoMap() has 1217 // Note that this function must be called after RestoreHistoryInfoMap() has
1269 // been run as the word starts may have to be recalculated from the urls and 1218 // been run as the word starts may have to be recalculated from the urls and
1270 // page titles. 1219 // page titles.
1271 if (cache.has_word_starts_map()) { 1220 if (cache.has_word_starts_map()) {
1272 const WordStartsMapItem& list_item(cache.word_starts_map()); 1221 const WordStartsMapItem& list_item(cache.word_starts_map());
1273 uint32_t expected_item_count = list_item.item_count(); 1222 uint32_t expected_item_count = list_item.item_count();
1274 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); 1223 uint32_t actual_item_count = list_item.word_starts_map_entry_size();
1275 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1224 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1276 return false; 1225 return false;
1277 const RepeatedPtrField<WordStartsMapEntry>& 1226 for (const auto& entry : list_item.word_starts_map_entry()) {
1278 entries(list_item.word_starts_map_entry()); 1227 HistoryID history_id = entry.history_id();
1279 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
1280 entries.begin(); iter != entries.end(); ++iter) {
1281 HistoryID history_id = iter->history_id();
1282 RowWordStarts word_starts; 1228 RowWordStarts word_starts;
1283 // Restore the URL word starts. 1229 // Restore the URL word starts.
1284 const RepeatedField<int32_t>& url_starts(iter->url_word_starts()); 1230 const RepeatedField<int32_t>& url_starts(entry.url_word_starts());
1285 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin(); 1231 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};
1286 jiter != url_starts.end(); ++jiter) 1232
1287 word_starts.url_word_starts_.push_back(*jiter);
1288 // Restore the page title word starts. 1233 // Restore the page title word starts.
1289 const RepeatedField<int32_t>& title_starts(iter->title_word_starts()); 1234 const RepeatedField<int32_t>& title_starts(entry.title_word_starts());
1290 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin(); 1235 word_starts.title_word_starts_ = {title_starts.begin(),
1291 jiter != title_starts.end(); ++jiter) 1236 title_starts.end()};
1292 word_starts.title_word_starts_.push_back(*jiter); 1237
1293 word_starts_map_[history_id] = word_starts; 1238 word_starts_map_[history_id] = std::move(word_starts);
1294 } 1239 }
1295 } else { 1240 } else {
1296 // Since the cache did not contain any word starts we must rebuild then from 1241 // Since the cache did not contain any word starts we must rebuild then from
1297 // the URL and page titles. 1242 // the URL and page titles.
1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1243 for (const auto& entry : history_info_map_) {
1299 iter != history_info_map_.end(); ++iter) {
1300 RowWordStarts word_starts; 1244 RowWordStarts word_starts;
1301 const history::URLRow& row(iter->second.url_row); 1245 const history::URLRow& row(entry.second.url_row);
1302 const base::string16& url = 1246 const base::string16& url =
1303 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); 1247 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);
1304 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1248 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1305 const base::string16& title = 1249 const base::string16& title =
1306 bookmarks::CleanUpTitleForMatching(row.title()); 1250 bookmarks::CleanUpTitleForMatching(row.title());
1307 String16VectorFromString16(title, false, &word_starts.title_word_starts_); 1251 String16VectorFromString16(title, false, &word_starts.title_word_starts_);
1308 word_starts_map_[iter->first] = word_starts; 1252 word_starts_map_[entry.first] = std::move(word_starts);
1309 } 1253 }
1310 } 1254 }
1311 return true; 1255 return true;
1312 } 1256 }
1313 1257
1314 // static 1258 // static
1315 bool URLIndexPrivateData::URLSchemeIsWhitelisted( 1259 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1316 const GURL& gurl, 1260 const GURL& gurl,
1317 const std::set<std::string>& whitelist) { 1261 const std::set<std::string>& whitelist) {
1318 return whitelist.find(gurl.scheme()) != whitelist.end(); 1262 return whitelist.find(gurl.scheme()) != whitelist.end();
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
1381 // First cut: typed count, visit count, recency. 1325 // First cut: typed count, visit count, recency.
1382 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1326 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1383 // recently visited (within the last 12/24 hours) as highly important. Get 1327 // recently visited (within the last 12/24 hours) as highly important. Get
1384 // input from mpearson. 1328 // input from mpearson.
1385 if (r1.typed_count() != r2.typed_count()) 1329 if (r1.typed_count() != r2.typed_count())
1386 return (r1.typed_count() > r2.typed_count()); 1330 return (r1.typed_count() > r2.typed_count());
1387 if (r1.visit_count() != r2.visit_count()) 1331 if (r1.visit_count() != r2.visit_count())
1388 return (r1.visit_count() > r2.visit_count()); 1332 return (r1.visit_count() > r2.visit_count());
1389 return (r1.last_visit() > r2.last_visit()); 1333 return (r1.last_visit() > r2.last_visit());
1390 } 1334 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698