Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2690303012: Cleaning up url_index_private_data and in_memory_url_index_types. (Closed)
Patch Set: Review, round 4. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 19 matching lines...) Expand all
30 #include "components/omnibox/browser/in_memory_url_index.h" 30 #include "components/omnibox/browser/in_memory_url_index.h"
31 #include "components/search_engines/template_url_service.h" 31 #include "components/search_engines/template_url_service.h"
32 #include "components/url_formatter/url_formatter.h" 32 #include "components/url_formatter/url_formatter.h"
33 33
34 #if defined(USE_SYSTEM_PROTOBUF) 34 #if defined(USE_SYSTEM_PROTOBUF)
35 #include <google/protobuf/repeated_field.h> 35 #include <google/protobuf/repeated_field.h>
36 #else 36 #else
37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
38 #endif 38 #endif
39 39
40 namespace {
41
40 using google::protobuf::RepeatedField; 42 using google::protobuf::RepeatedField;
41 using google::protobuf::RepeatedPtrField; 43 using google::protobuf::RepeatedPtrField;
42 using in_memory_url_index::InMemoryURLIndexCacheItem; 44 using in_memory_url_index::InMemoryURLIndexCacheItem;
43 45
44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem 46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem
45 WordListItem; 47 WordListItem;
46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry 48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry
47 WordMapEntry; 49 WordMapEntry;
48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem 51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem
(...skipping 20 matching lines...) Expand all
70 WordStartsMapEntry; 72 WordStartsMapEntry;
71 73
72 // Algorithm Functions --------------------------------------------------------- 74 // Algorithm Functions ---------------------------------------------------------
73 75
74 // Comparison function for sorting search terms by descending length. 76 // Comparison function for sorting search terms by descending length.
75 bool LengthGreater(const base::string16& string_a, 77 bool LengthGreater(const base::string16& string_a,
76 const base::string16& string_b) { 78 const base::string16& string_b) {
77 return string_a.length() > string_b.length(); 79 return string_a.length() > string_b.length();
78 } 80 }
79 81
82 } // namespace
80 83
81 // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- 84 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------
82 85
83 // HistoryDBTask used to update the recent visit data for a particular 86 // HistoryDBTask used to update the recent visit data for a particular
84 // row from the history database. 87 // row from the history database.
85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask { 88 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {
86 public: 89 public:
87 explicit UpdateRecentVisitsFromHistoryDBTask( 90 explicit UpdateRecentVisitsFromHistoryDBTask(
88 URLIndexPrivateData* private_data, 91 URLIndexPrivateData* private_data,
89 history::URLID url_id); 92 history::URLID url_id);
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 // the index we need individual, lower-cased words, ignoring escapings. For 183 // the index we need individual, lower-cased words, ignoring escapings. For
181 // the final filtering we need whitespace separated substrings possibly 184 // the final filtering we need whitespace separated substrings possibly
182 // containing escaped characters. 185 // containing escaped characters.
183 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 186 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
184 base::string16 lower_unescaped_string = net::UnescapeURLComponent( 187 base::string16 lower_unescaped_string = net::UnescapeURLComponent(
185 lower_raw_string, 188 lower_raw_string,
186 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 189 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
187 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 190 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
188 191
189 // Extract individual 'words' (as opposed to 'terms'; see comment in 192 // Extract individual 'words' (as opposed to 'terms'; see comment in
190 // HistoryIdSetToScoredMatches()) from the search string. When the user 193 // HistoryIdsToScoredMatches()) from the search string. When the user types
191 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", 194 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id",
192 // "mstone" and "release". 195 // "mstone" and "release".
193 String16Vector lower_words( 196 String16Vector lower_words(
194 String16VectorFromString16(lower_unescaped_string, false, nullptr)); 197 String16VectorFromString16(lower_unescaped_string, false, nullptr));
195 if (lower_words.empty()) 198 if (lower_words.empty())
196 continue; 199 continue;
197 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
198 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_id_set);
199 200
200 ScoredHistoryMatches temp_scored_items; 201 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words);
201 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string, 202 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids);
202 template_url_service, bookmark_model, 203
203 &temp_scored_items); 204 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string,
204 scored_items.insert(scored_items.end(), temp_scored_items.begin(), 205 template_url_service, bookmark_model,
205 temp_scored_items.end()); 206 &scored_items);
dyaroshev 2017/02/26 01:12:17 In fixed version of HistoryIdsToScoredMatches() we
206 } 207 }
207 // Select and sort only the top |max_matches| results. 208 // Select and sort only the top |max_matches| results.
208 if (scored_items.size() > max_matches) { 209 if (scored_items.size() > max_matches) {
209 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, 210 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
210 scored_items.end(), 211 scored_items.end(),
211 ScoredHistoryMatch::MatchScoreGreater); 212 ScoredHistoryMatch::MatchScoreGreater);
212 scored_items.resize(max_matches); 213 scored_items.resize(max_matches);
213 } else { 214 } else {
214 std::sort(scored_items.begin(), scored_items.end(), 215 std::sort(scored_items.begin(), scored_items.end(),
215 ScoredHistoryMatch::MatchScoreGreater); 216 ScoredHistoryMatch::MatchScoreGreater);
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
445 // search_term_cache_ 446 // search_term_cache_
446 } 447 }
447 448
448 bool URLIndexPrivateData::Empty() const { 449 bool URLIndexPrivateData::Empty() const {
449 return history_info_map_.empty(); 450 return history_info_map_.empty();
450 } 451 }
451 452
452 void URLIndexPrivateData::Clear() { 453 void URLIndexPrivateData::Clear() {
453 last_time_rebuilt_from_history_ = base::Time(); 454 last_time_rebuilt_from_history_ = base::Time();
454 word_list_.clear(); 455 word_list_.clear();
455 available_words_.clear(); 456 available_words_ = {};
456 word_map_.clear(); 457 word_map_.clear();
457 char_word_map_.clear(); 458 char_word_map_.clear();
458 word_id_history_map_.clear(); 459 word_id_history_map_.clear();
459 history_id_word_map_.clear(); 460 history_id_word_map_.clear();
460 history_info_map_.clear(); 461 history_info_map_.clear();
461 word_starts_map_.clear(); 462 word_starts_map_.clear();
462 } 463 }
463 464
464 URLIndexPrivateData::~URLIndexPrivateData() {} 465 URLIndexPrivateData::~URLIndexPrivateData() {}
465 466
466 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( 467 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords(
467 const String16Vector& unsorted_words) { 468 const String16Vector& unsorted_words) {
469 // This histogram name reflects the historic name of this function.
468 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords"); 470 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords");
469 // Break the terms down into individual terms (words), get the candidate 471 // Break the terms down into individual terms (words), get the candidate
470 // set for each term, and intersect each to get a final candidate list. 472 // set for each term, and intersect each to get a final candidate list.
471 // Note that a single 'term' from the user's perspective might be 473 // Note that a single 'term' from the user's perspective might be
472 // a string like "http://www.somewebsite.com" which, from our perspective, 474 // a string like "http://www.somewebsite.com" which, from our perspective,
473 // is four words: 'http', 'www', 'somewebsite', and 'com'. 475 // is four words: 'http', 'www', 'somewebsite', and 'com'.
474 HistoryIDSet history_id_set; 476 HistoryIDVector history_ids;
475 String16Vector words(unsorted_words); 477 String16Vector words(unsorted_words);
476 // Sort the words into the longest first as such are likely to narrow down 478 // Sort the words into the longest first as such are likely to narrow down
477 // the results quicker. Also, single character words are the most expensive 479 // the results quicker. Also, single character words are the most expensive
478 // to process so save them for last. 480 // to process so save them for last.
479 std::sort(words.begin(), words.end(), LengthGreater); 481 std::sort(words.begin(), words.end(), LengthGreater);
482
483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).
480 for (String16Vector::iterator iter = words.begin(); iter != words.end(); 484 for (String16Vector::iterator iter = words.begin(); iter != words.end();
481 ++iter) { 485 ++iter) {
482 base::string16 uni_word = *iter; 486 base::string16 uni_word = *iter;
Peter Kasting 2017/02/28 00:33:54 Nit: Inline into next statement.
483 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); 487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
484 if (term_history_set.empty()) { 488 if (term_history_set.empty()) {
485 history_id_set.clear(); 489 history_ids.clear();
Peter Kasting 2017/02/28 00:33:54 Nit: Just "return HistoryIDVector();" rather than
486 break; 490 break;
487 } 491 }
488 if (iter == words.begin()) { 492 if (iter == words.begin()) {
489 history_id_set.swap(term_history_set); 493 history_ids = {term_history_set.begin(), term_history_set.end()};
490 } else { 494 } else {
491 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>( 495 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids,
492 history_id_set, term_history_set); 496 term_history_set);
493 history_id_set.swap(new_history_id_set);
494 } 497 }
495 } 498 }
496 return history_id_set; 499 return history_ids;
497 } 500 }
498 501
499 bool URLIndexPrivateData::TrimHistoryIdsPool( 502 bool URLIndexPrivateData::TrimHistoryIdsPool(
500 HistoryIDSet* history_id_set) const { 503 HistoryIDVector* history_ids) const {
501 constexpr size_t kItemsToScoreLimit = 500; 504 constexpr size_t kItemsToScoreLimit = 500;
502 if (history_id_set->size() <= kItemsToScoreLimit) 505 if (history_ids->size() <= kItemsToScoreLimit)
503 return false; 506 return false;
504 507
505 HistoryIDVector history_ids(history_id_set->begin(), history_id_set->end());
506
507 // Trim down the set by sorting by typed-count, visit-count, and last 508 // Trim down the set by sorting by typed-count, visit-count, and last
508 // visit. 509 // visit.
509 auto new_end = history_ids.begin() + kItemsToScoreLimit; 510 auto new_end = history_ids->begin() + kItemsToScoreLimit;
510 HistoryItemFactorGreater item_factor_functor(history_info_map_); 511 HistoryItemFactorGreater item_factor_functor(history_info_map_);
511 512
512 std::nth_element(history_ids.begin(), new_end, history_ids.end(), 513 std::nth_element(history_ids->begin(), new_end, history_ids->end(),
513 item_factor_functor); 514 item_factor_functor);
514 history_ids.erase(new_end, history_ids.end()); 515 history_ids->erase(new_end, history_ids->end());
515 516
516 *history_id_set = {history_ids.begin(), history_ids.end()};
517 return true; 517 return true;
518 } 518 }
519 519
520 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( 520 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
521 const base::string16& term) { 521 const base::string16& term) {
522 if (term.empty()) 522 if (term.empty())
523 return HistoryIDSet(); 523 return HistoryIDSet();
524 524
525 // TODO(mrossetti): Consider optimizing for very common terms such as 525 // TODO(mrossetti): Consider optimizing for very common terms such as
526 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently 526 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
574 574
575 // Reduce the word set with any leftover, unprocessed characters. 575 // Reduce the word set with any leftover, unprocessed characters.
576 if (!unique_chars.empty()) { 576 if (!unique_chars.empty()) {
577 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); 577 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
578 // We might come up empty on the leftovers. 578 // We might come up empty on the leftovers.
579 if (leftover_set.empty()) { 579 if (leftover_set.empty()) {
580 search_term_cache_[term] = SearchTermCacheItem(); 580 search_term_cache_[term] = SearchTermCacheItem();
581 return HistoryIDSet(); 581 return HistoryIDSet();
582 } 582 }
583 // Or there may not have been a prefix from which to start. 583 // Or there may not have been a prefix from which to start.
584 if (prefix_chars.empty()) { 584 word_id_set = prefix_chars.empty() ? std::move(leftover_set)
585 word_id_set.swap(leftover_set); 585 : base::STLSetIntersection<WordIDSet>(
586 } else { 586 word_id_set, leftover_set);
587 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
588 word_id_set, leftover_set);
589 word_id_set.swap(new_word_id_set);
590 }
591 } 587 }
592 588
593 // We must filter the word list because the resulting word set surely 589 // We must filter the word list because the resulting word set surely
594 // contains words which do not have the search term as a proper subset. 590 // contains words which do not have the search term as a proper subset.
595 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 591 for (WordIDSet::iterator word_set_iter = word_id_set.begin();
596 word_set_iter != word_id_set.end(); ) { 592 word_set_iter != word_id_set.end(); ) {
597 if (word_list_[*word_set_iter].find(term) == base::string16::npos) 593 if (word_list_[*word_set_iter].find(term) == base::string16::npos)
598 word_set_iter = word_id_set.erase(word_set_iter); 594 word_set_iter = word_id_set.erase(word_set_iter);
599 else 595 else
600 ++word_set_iter; 596 ++word_set_iter;
601 } 597 }
602 } else { 598 } else {
603 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 599 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
604 } 600 }
605 601
606 // If any words resulted then we can compose a set of history IDs by unioning 602 // If any words resulted then we can compose a set of history IDs by unioning
607 // the sets from each word. 603 // the sets from each word.
608 HistoryIDSet history_id_set; 604 HistoryIDSet history_id_set;
609 if (!word_id_set.empty()) { 605 for (WordID word_id : word_id_set) {
610 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 606 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
611 word_id_iter != word_id_set.end(); ++word_id_iter) { 607 if (word_iter != word_id_history_map_.end()) {
612 WordID word_id = *word_id_iter; 608 HistoryIDSet& word_history_id_set(word_iter->second);
613 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 609 history_id_set.insert(word_history_id_set.begin(),
614 if (word_iter != word_id_history_map_.end()) { 610 word_history_id_set.end());
615 HistoryIDSet& word_history_id_set(word_iter->second);
616 history_id_set.insert(word_history_id_set.begin(),
617 word_history_id_set.end());
618 }
619 } 611 }
620 } 612 }
621 613
622 // Record a new cache entry for this word if the term is longer than 614 // Record a new cache entry for this word if the term is longer than
623 // a single character. 615 // a single character.
624 if (term_length > 1) 616 if (term_length > 1)
625 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
626 618
627 return history_id_set; 619 return history_id_set;
628 } 620 }
629 621
630 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( 622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
631 const Char16Set& term_chars) { 623 const Char16Set& term_chars) {
624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).
625
632 WordIDSet word_id_set; 626 WordIDSet word_id_set;
633 for (Char16Set::const_iterator c_iter = term_chars.begin(); 627 for (Char16Set::const_iterator c_iter = term_chars.begin();
634 c_iter != term_chars.end(); ++c_iter) { 628 c_iter != term_chars.end(); ++c_iter) {
635 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); 629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
636 if (char_iter == char_word_map_.end()) { 630 if (char_iter == char_word_map_.end()) {
637 // A character was not found so there are no matching results: bail. 631 // A character was not found so there are no matching results: bail.
638 word_id_set.clear(); 632 word_id_set.clear();
Peter Kasting 2017/02/28 00:33:54 Nit: Same comment as above
639 break; 633 break;
640 } 634 }
641 WordIDSet& char_word_id_set(char_iter->second); 635 WordIDSet& char_word_id_set(char_iter->second);
Peter Kasting 2017/02/28 00:33:54 Nit: Can be const
642 // It is possible for there to no longer be any words associated with 636 // It is possible for there to no longer be any words associated with
643 // a particular character. Give up in that case. 637 // a particular character. Give up in that case.
644 if (char_word_id_set.empty()) { 638 if (char_word_id_set.empty()) {
645 word_id_set.clear(); 639 word_id_set.clear();
646 break; 640 break;
647 } 641 }
648 642
649 if (c_iter == term_chars.begin()) { 643 if (c_iter == term_chars.begin()) {
650 // First character results becomes base set of results. 644 // First character results becomes base set of results.
Peter Kasting 2017/02/28 00:33:55 Nit: These comments just restate the code, remove.
651 word_id_set = char_word_id_set; 645 word_id_set = char_word_id_set;
652 } else { 646 } else {
653 // Subsequent character results get intersected in. 647 // Subsequent character results get intersected in.
654 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>( 648 word_id_set =
655 word_id_set, char_word_id_set); 649 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set);
656 word_id_set.swap(new_word_id_set);
657 } 650 }
658 } 651 }
659 return word_id_set; 652 return word_id_set;
660 } 653 }
661 654
662 void URLIndexPrivateData::HistoryIdSetToScoredMatches( 655 void URLIndexPrivateData::HistoryIdsToScoredMatches(
663 HistoryIDSet history_id_set, 656 HistoryIDVector history_ids,
664 const base::string16& lower_raw_string, 657 const base::string16& lower_raw_string,
665 const TemplateURLService* template_url_service, 658 const TemplateURLService* template_url_service,
666 bookmarks::BookmarkModel* bookmark_model, 659 bookmarks::BookmarkModel* bookmark_model,
667 ScoredHistoryMatches* scored_items) const { 660 ScoredHistoryMatches* scored_items) const {
668 if (history_id_set.empty()) 661 if (history_ids.empty())
669 return; 662 return;
670 663
671 // Break up the raw search string (complete with escaped URL elements) into 664 // Break up the raw search string (complete with escaped URL elements) into
672 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()). 665 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()).
673 // We only want to break up the search string on 'true' whitespace rather than 666 // We only want to break up the search string on 'true' whitespace rather than
674 // escaped whitespace. For example, when the user types 667 // escaped whitespace. For example, when the user types
675 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and 668 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and
676 // "release". 669 // "release".
677 String16Vector lower_raw_terms = 670 String16Vector lower_raw_terms =
678 base::SplitString(lower_raw_string, base::kWhitespaceUTF16, 671 base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
679 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); 672 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
680 673
681 // Don't score matches when there are no terms to score against. (It's 674 // Don't score matches when there are no terms to score against. (It's
682 // possible that the word break iterater that extracts words to search for in 675 // possible that the word break iterater that extracts words to search for in
683 // the database allows some whitespace "words" whereas SplitString excludes a 676 // the database allows some whitespace "words" whereas SplitString excludes a
684 // long list of whitespace.) One could write a scoring function that gives a 677 // long list of whitespace.) One could write a scoring function that gives a
685 // reasonable order to matches when there are no terms (i.e., all the words 678 // reasonable order to matches when there are no terms (i.e., all the words
686 // are some form of whitespace), but this is such a rare edge case that it's 679 // are some form of whitespace), but this is such a rare edge case that it's
687 // not worth the time. 680 // not worth the time.
688 if (lower_raw_terms.empty()) 681 if (lower_raw_terms.empty())
689 return; 682 return;
690 683
691 WordStarts lower_terms_to_word_starts_offsets; 684 WordStarts lower_terms_to_word_starts_offsets;
692 CalculateWordStartsOffsets(lower_raw_terms, 685 CalculateWordStartsOffsets(lower_raw_terms,
693 &lower_terms_to_word_starts_offsets); 686 &lower_terms_to_word_starts_offsets);
694 687
695 // Filter bad matches and other matches we don't want to display. 688 // Filter bad matches and other matches we don't want to display.
696 for (auto it = history_id_set.begin();;) { 689 auto filter = [this, template_url_service](const HistoryID history_id) {
697 it = std::find_if(it, history_id_set.end(), 690 return ShouldFilter(history_id, template_url_service);
698 [this, template_url_service](const HistoryID history_id) { 691 };
699 return ShouldFilter(history_id, template_url_service); 692 history_ids.erase(
700 }); 693 std::remove_if(history_ids.begin(), history_ids.end(), filter),
701 if (it == history_id_set.end()) 694 history_ids.end());
702 break;
703 it = history_id_set.erase(it);
704 }
705 695
706 // Score the matches. 696 // Score the matches.
707 const size_t num_matches = history_id_set.size(); 697 const size_t num_matches = history_ids.size();
708 const base::Time now = base::Time::Now(); 698 const base::Time now = base::Time::Now();
709 std::transform(
710 history_id_set.begin(), history_id_set.end(),
711 std::back_inserter(*scored_items), [&](const HistoryID history_id) {
712 auto hist_pos = history_info_map_.find(history_id);
713 const history::URLRow& hist_item = hist_pos->second.url_row;
714 auto starts_pos = word_starts_map_.find(history_id);
715 DCHECK(starts_pos != word_starts_map_.end());
716 return ScoredHistoryMatch(
717 hist_item, hist_pos->second.visits, lower_raw_string,
718 lower_raw_terms, lower_terms_to_word_starts_offsets,
719 starts_pos->second,
720 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),
721 num_matches, now);
722 });
723 699
dyaroshev 2017/02/26 01:12:17 First transforming and then removing seems unneces
724 // Filter all matches that ended up scoring 0. (These are usually matches 700 for (HistoryID history_id : history_ids) {
725 // which didn't match the user's raw terms.) 701 auto hist_pos = history_info_map_.find(history_id);
726 scored_items->erase(std::remove_if(scored_items->begin(), scored_items->end(), 702 const history::URLRow& hist_item = hist_pos->second.url_row;
727 [](const ScoredHistoryMatch& match) { 703 auto starts_pos = word_starts_map_.find(history_id);
728 return match.raw_score == 0; 704 DCHECK(starts_pos != word_starts_map_.end());
729 }), 705 ScoredHistoryMatch new_scored_match(
730 scored_items->end()); 706 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms,
707 lower_terms_to_word_starts_offsets, starts_pos->second,
708 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),
709 num_matches, now);
710 // Filter new matches that ended up scoring 0. (These are usually matches
711 // which didn't match the user's raw terms.)
712 if (new_scored_match.raw_score != 0)
Peter Kasting 2017/02/28 00:33:54 Nit: > 0?
713 scored_items->push_back(std::move(new_scored_match));
714 }
731 } 715 }
732 716
733 // static 717 // static
734 void URLIndexPrivateData::CalculateWordStartsOffsets( 718 void URLIndexPrivateData::CalculateWordStartsOffsets(
735 const String16Vector& lower_terms, 719 const String16Vector& lower_terms,
736 WordStarts* lower_terms_to_word_starts_offsets) { 720 WordStarts* lower_terms_to_word_starts_offsets) {
737 // Calculate offsets for each term. For instance, the offset for 721 // Calculate offsets for each term. For instance, the offset for
738 // ".net" should be 1, indicating that the actual word-part of the term 722 // ".net" should be 1, indicating that the actual word-part of the term
739 // starts at offset 1. 723 // starts at offset 1.
740 lower_terms_to_word_starts_offsets->resize(lower_terms.size(), 0u); 724 lower_terms_to_word_starts_offsets->resize(lower_terms.size(), 0u);
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
812 HistoryID history_id = static_cast<HistoryID>(row.id()); 796 HistoryID history_id = static_cast<HistoryID>(row.id());
813 // Split URL into individual, unique words then add in the title words. 797 // Split URL into individual, unique words then add in the title words.
814 const GURL& gurl(row.url()); 798 const GURL& gurl(row.url());
815 const base::string16& url = 799 const base::string16& url =
816 bookmarks::CleanUpUrlForMatching(gurl, nullptr); 800 bookmarks::CleanUpUrlForMatching(gurl, nullptr);
817 String16Set url_words = String16SetFromString16(url, 801 String16Set url_words = String16SetFromString16(url,
818 word_starts ? &word_starts->url_word_starts_ : nullptr); 802 word_starts ? &word_starts->url_word_starts_ : nullptr);
819 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); 803 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
820 String16Set title_words = String16SetFromString16(title, 804 String16Set title_words = String16SetFromString16(title,
821 word_starts ? &word_starts->title_word_starts_ : nullptr); 805 word_starts ? &word_starts->title_word_starts_ : nullptr);
822 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); 806 for (const auto& word :
823 for (String16Set::iterator word_iter = words.begin(); 807 base::STLSetUnion<String16Set>(url_words, title_words))
824 word_iter != words.end(); ++word_iter) 808 AddWordToIndex(word, history_id);
825 AddWordToIndex(*word_iter, history_id);
826 809
827 search_term_cache_.clear(); // Invalidate the term cache. 810 search_term_cache_.clear(); // Invalidate the term cache.
828 } 811 }
829 812
830 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, 813 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
831 HistoryID history_id) { 814 HistoryID history_id) {
832 WordMap::iterator word_pos = word_map_.find(term); 815 WordMap::iterator word_pos;
833 if (word_pos != word_map_.end()) 816 bool is_new;
834 UpdateWordHistory(word_pos->second, history_id); 817 std::tie(word_pos, is_new) = word_map_.emplace(term, WordID());
835 else 818
836 AddWordHistory(term, history_id); 819 // Adding a new word (i.e. a word that is not already in the word index).
820 if (is_new) {
821 word_pos->second = AddNewWordToWordList(term);
822
823 // For each character in the newly added word add the word to the character
824 // index.
825 for (base::char16 uni_char : Char16SetFromString16(term))
826 char_word_map_[uni_char].insert(word_pos->second);
827 }
828
829 word_id_history_map_[word_pos->second].insert(history_id);
830 history_id_word_map_[history_id].insert(word_pos->second);
837 } 831 }
838 832
839 void URLIndexPrivateData::AddWordHistory(const base::string16& term, 833 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) {
840 HistoryID history_id) {
841 WordID word_id = word_list_.size(); 834 WordID word_id = word_list_.size();
842 if (available_words_.empty()) { 835 if (available_words_.empty()) {
843 word_list_.push_back(term); 836 word_list_.push_back(term);
844 } else { 837 return word_id;
845 word_id = *(available_words_.begin());
846 word_list_[word_id] = term;
847 available_words_.erase(word_id);
848 } 838 }
849 word_map_[term] = word_id;
850 839
851 HistoryIDSet history_id_set; 840 word_id = available_words_.top();
852 history_id_set.insert(history_id); 841 available_words_.pop();
853 word_id_history_map_[word_id] = history_id_set; 842 word_list_[word_id] = term;
854 AddToHistoryIDWordMap(history_id, word_id); 843 return word_id;
855
856 // For each character in the newly added word (i.e. a word that is not
857 // already in the word index), add the word to the character index.
858 Char16Set characters = Char16SetFromString16(term);
859 for (Char16Set::iterator uni_char_iter = characters.begin();
860 uni_char_iter != characters.end(); ++uni_char_iter) {
861 base::char16 uni_char = *uni_char_iter;
862 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
863 if (char_iter != char_word_map_.end()) {
864 // Update existing entry in the char/word index.
865 WordIDSet& word_id_set(char_iter->second);
866 word_id_set.insert(word_id);
867 } else {
868 // Create a new entry in the char/word index.
869 WordIDSet word_id_set;
870 word_id_set.insert(word_id);
871 char_word_map_[uni_char] = word_id_set;
872 }
873 }
874 }
875
876 void URLIndexPrivateData::UpdateWordHistory(WordID word_id,
877 HistoryID history_id) {
878 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
879 DCHECK(history_pos != word_id_history_map_.end());
880 HistoryIDSet& history_id_set(history_pos->second);
881 history_id_set.insert(history_id);
882 AddToHistoryIDWordMap(history_id, word_id);
883 }
884
885 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id,
886 WordID word_id) {
887 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id);
888 if (iter != history_id_word_map_.end()) {
889 WordIDSet& word_id_set(iter->second);
890 word_id_set.insert(word_id);
891 } else {
892 WordIDSet word_id_set;
893 word_id_set.insert(word_id);
894 history_id_word_map_[history_id] = word_id_set;
895 }
896 } 844 }
897 845
898 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) { 846 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) {
899 RemoveRowWordsFromIndex(row); 847 RemoveRowWordsFromIndex(row);
900 HistoryID history_id = static_cast<HistoryID>(row.id()); 848 HistoryID history_id = static_cast<HistoryID>(row.id());
901 history_info_map_.erase(history_id); 849 history_info_map_.erase(history_id);
902 word_starts_map_.erase(history_id); 850 word_starts_map_.erase(history_id);
903 } 851 }
904 852
905 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) { 853 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) {
906 // Remove the entries in history_id_word_map_ and word_id_history_map_ for 854 // Remove the entries in history_id_word_map_ and word_id_history_map_ for
907 // this row. 855 // this row.
908 HistoryID history_id = static_cast<HistoryID>(row.id()); 856 HistoryID history_id = static_cast<HistoryID>(row.id());
909 WordIDSet word_id_set = history_id_word_map_[history_id]; 857 WordIDSet word_id_set = history_id_word_map_[history_id];
910 history_id_word_map_.erase(history_id); 858 history_id_word_map_.erase(history_id);
911 859
912 // Reconcile any changes to word usage. 860 // Reconcile any changes to word usage.
913 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 861 for (WordID word_id : word_id_set) {
914 word_id_iter != word_id_set.end(); ++word_id_iter) { 862 auto word_id_history_map_iter = word_id_history_map_.find(word_id);
915 WordID word_id = *word_id_iter; 863 DCHECK(word_id_history_map_iter != word_id_history_map_.end());
916 word_id_history_map_[word_id].erase(history_id); 864
917 if (!word_id_history_map_[word_id].empty()) 865 word_id_history_map_iter->second.erase(history_id);
918 continue; // The word is still in use. 866 if (!word_id_history_map_iter->second.empty())
867 continue;
919 868
920 // The word is no longer in use. Reconcile any changes to character usage. 869 // The word is no longer in use. Reconcile any changes to character usage.
921 base::string16 word = word_list_[word_id]; 870 base::string16 word = word_list_[word_id];
922 Char16Set characters = Char16SetFromString16(word); 871 for (base::char16 uni_char : Char16SetFromString16(word)) {
923 for (Char16Set::iterator uni_char_iter = characters.begin(); 872 auto char_word_map_iter = char_word_map_.find(uni_char);
924 uni_char_iter != characters.end(); ++uni_char_iter) { 873 char_word_map_iter->second.erase(word_id);
925 base::char16 uni_char = *uni_char_iter; 874 if (char_word_map_iter->second.empty())
926 char_word_map_[uni_char].erase(word_id); 875 char_word_map_.erase(char_word_map_iter);
927 if (char_word_map_[uni_char].empty())
928 char_word_map_.erase(uni_char); // No longer in use.
929 } 876 }
930 877
931 // Complete the removal of references to the word. 878 // Complete the removal of references to the word.
932 word_id_history_map_.erase(word_id); 879 word_id_history_map_.erase(word_id_history_map_iter);
933 word_map_.erase(word); 880 word_map_.erase(word);
934 word_list_[word_id] = base::string16(); 881 word_list_[word_id] = base::string16();
935 available_words_.insert(word_id); 882 available_words_.push(word_id);
936 } 883 }
937 } 884 }
938 885
939 void URLIndexPrivateData::ResetSearchTermCache() { 886 void URLIndexPrivateData::ResetSearchTermCache() {
940 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); 887 for (auto& item : search_term_cache_)
941 iter != search_term_cache_.end(); ++iter) 888 item.second.used_ = false;
942 iter->second.used_ = false;
943 } 889 }
944 890
945 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) { 891 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) {
946 base::TimeTicks beginning_time = base::TimeTicks::Now(); 892 base::TimeTicks beginning_time = base::TimeTicks::Now();
947 InMemoryURLIndexCacheItem index_cache; 893 InMemoryURLIndexCacheItem index_cache;
948 SavePrivateData(&index_cache); 894 SavePrivateData(&index_cache);
949 std::string data; 895 std::string data;
950 if (!index_cache.SerializeToString(&data)) { 896 if (!index_cache.SerializeToString(&data)) {
951 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 897 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
952 return false; 898 return false;
(...skipping 24 matching lines...) Expand all
977 SaveWordIDHistoryMap(cache); 923 SaveWordIDHistoryMap(cache);
978 SaveHistoryInfoMap(cache); 924 SaveHistoryInfoMap(cache);
979 SaveWordStartsMap(cache); 925 SaveWordStartsMap(cache);
980 } 926 }
981 927
982 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 928 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
983 if (word_list_.empty()) 929 if (word_list_.empty())
984 return; 930 return;
985 WordListItem* list_item = cache->mutable_word_list(); 931 WordListItem* list_item = cache->mutable_word_list();
986 list_item->set_word_count(word_list_.size()); 932 list_item->set_word_count(word_list_.size());
987 for (String16Vector::const_iterator iter = word_list_.begin(); 933 for (const base::string16& word : word_list_)
988 iter != word_list_.end(); ++iter) 934 list_item->add_word(base::UTF16ToUTF8(word));
989 list_item->add_word(base::UTF16ToUTF8(*iter));
990 } 935 }
991 936
992 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 937 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
993 if (word_map_.empty()) 938 if (word_map_.empty())
994 return; 939 return;
995 WordMapItem* map_item = cache->mutable_word_map(); 940 WordMapItem* map_item = cache->mutable_word_map();
996 map_item->set_item_count(word_map_.size()); 941 map_item->set_item_count(word_map_.size());
997 for (WordMap::const_iterator iter = word_map_.begin(); 942 for (const auto& elem : word_map_) {
998 iter != word_map_.end(); ++iter) {
999 WordMapEntry* map_entry = map_item->add_word_map_entry(); 943 WordMapEntry* map_entry = map_item->add_word_map_entry();
1000 map_entry->set_word(base::UTF16ToUTF8(iter->first)); 944 map_entry->set_word(base::UTF16ToUTF8(elem.first));
1001 map_entry->set_word_id(iter->second); 945 map_entry->set_word_id(elem.second);
1002 } 946 }
1003 } 947 }
1004 948
1005 void URLIndexPrivateData::SaveCharWordMap( 949 void URLIndexPrivateData::SaveCharWordMap(
1006 InMemoryURLIndexCacheItem* cache) const { 950 InMemoryURLIndexCacheItem* cache) const {
1007 if (char_word_map_.empty()) 951 if (char_word_map_.empty())
1008 return; 952 return;
1009 CharWordMapItem* map_item = cache->mutable_char_word_map(); 953 CharWordMapItem* map_item = cache->mutable_char_word_map();
1010 map_item->set_item_count(char_word_map_.size()); 954 map_item->set_item_count(char_word_map_.size());
1011 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 955 for (const auto& entry : char_word_map_) {
1012 iter != char_word_map_.end(); ++iter) {
1013 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 956 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
1014 map_entry->set_char_16(iter->first); 957 map_entry->set_char_16(entry.first);
1015 const WordIDSet& word_id_set(iter->second); 958 const WordIDSet& word_id_set = entry.second;
1016 map_entry->set_item_count(word_id_set.size()); 959 map_entry->set_item_count(word_id_set.size());
1017 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 960 for (WordID word_id : word_id_set)
1018 set_iter != word_id_set.end(); ++set_iter) 961 map_entry->add_word_id(word_id);
1019 map_entry->add_word_id(*set_iter);
1020 } 962 }
1021 } 963 }
1022 964
1023 void URLIndexPrivateData::SaveWordIDHistoryMap( 965 void URLIndexPrivateData::SaveWordIDHistoryMap(
1024 InMemoryURLIndexCacheItem* cache) const { 966 InMemoryURLIndexCacheItem* cache) const {
1025 if (word_id_history_map_.empty()) 967 if (word_id_history_map_.empty())
1026 return; 968 return;
1027 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 969 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
1028 map_item->set_item_count(word_id_history_map_.size()); 970 map_item->set_item_count(word_id_history_map_.size());
1029 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 971 for (const auto& entry : word_id_history_map_) {
1030 iter != word_id_history_map_.end(); ++iter) {
1031 WordIDHistoryMapEntry* map_entry = 972 WordIDHistoryMapEntry* map_entry =
1032 map_item->add_word_id_history_map_entry(); 973 map_item->add_word_id_history_map_entry();
1033 map_entry->set_word_id(iter->first); 974 map_entry->set_word_id(entry.first);
1034 const HistoryIDSet& history_id_set(iter->second); 975 const HistoryIDSet& history_id_set = entry.second;
1035 map_entry->set_item_count(history_id_set.size()); 976 map_entry->set_item_count(history_id_set.size());
1036 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 977 for (HistoryID history_id : history_id_set)
1037 set_iter != history_id_set.end(); ++set_iter) 978 map_entry->add_history_id(history_id);
1038 map_entry->add_history_id(*set_iter);
1039 } 979 }
1040 } 980 }
1041 981
1042 void URLIndexPrivateData::SaveHistoryInfoMap( 982 void URLIndexPrivateData::SaveHistoryInfoMap(
1043 InMemoryURLIndexCacheItem* cache) const { 983 InMemoryURLIndexCacheItem* cache) const {
1044 if (history_info_map_.empty()) 984 if (history_info_map_.empty())
1045 return; 985 return;
1046 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 986 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1047 map_item->set_item_count(history_info_map_.size()); 987 map_item->set_item_count(history_info_map_.size());
1048 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 988 for (const auto& entry : history_info_map_) {
1049 iter != history_info_map_.end(); ++iter) {
1050 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 989 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1051 map_entry->set_history_id(iter->first); 990 map_entry->set_history_id(entry.first);
1052 const history::URLRow& url_row(iter->second.url_row); 991 const history::URLRow& url_row = entry.second.url_row;
1053 // Note: We only save information that contributes to the index so there 992 // Note: We only save information that contributes to the index so there
1054 // is no need to save search_term_cache_ (not persistent). 993 // is no need to save search_term_cache_ (not persistent).
1055 map_entry->set_visit_count(url_row.visit_count()); 994 map_entry->set_visit_count(url_row.visit_count());
1056 map_entry->set_typed_count(url_row.typed_count()); 995 map_entry->set_typed_count(url_row.typed_count());
1057 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 996 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1058 map_entry->set_url(url_row.url().spec()); 997 map_entry->set_url(url_row.url().spec());
1059 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); 998 map_entry->set_title(base::UTF16ToUTF8(url_row.title()));
1060 const VisitInfoVector& visits(iter->second.visits); 999 for (const auto& visit : entry.second.visits) {
1061 for (VisitInfoVector::const_iterator visit_iter = visits.begin();
1062 visit_iter != visits.end(); ++visit_iter) {
1063 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits(); 1000 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits();
1064 visit_info->set_visit_time(visit_iter->first.ToInternalValue()); 1001 visit_info->set_visit_time(visit.first.ToInternalValue());
1065 visit_info->set_transition_type(visit_iter->second); 1002 visit_info->set_transition_type(visit.second);
1066 } 1003 }
1067 } 1004 }
1068 } 1005 }
1069 1006
1070 void URLIndexPrivateData::SaveWordStartsMap( 1007 void URLIndexPrivateData::SaveWordStartsMap(
1071 InMemoryURLIndexCacheItem* cache) const { 1008 InMemoryURLIndexCacheItem* cache) const {
1072 if (word_starts_map_.empty()) 1009 if (word_starts_map_.empty())
1073 return; 1010 return;
1074 // For unit testing: Enable saving of the cache as an earlier version to 1011 // For unit testing: Enable saving of the cache as an earlier version to
1075 // allow testing of cache file upgrading in ReadFromFile(). 1012 // allow testing of cache file upgrading in ReadFromFile().
1076 // TODO(mrossetti): Instead of intruding on production code with this kind of 1013 // TODO(mrossetti): Instead of intruding on production code with this kind of
1077 // test harness, save a copy of an older version cache with known results. 1014 // test harness, save a copy of an older version cache with known results.
1078 // Implement this when switching the caching over to SQLite. 1015 // Implement this when switching the caching over to SQLite.
1079 if (saved_cache_version_ < 1) 1016 if (saved_cache_version_ < 1)
1080 return; 1017 return;
1081 1018
1082 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); 1019 WordStartsMapItem* map_item = cache->mutable_word_starts_map();
1083 map_item->set_item_count(word_starts_map_.size()); 1020 map_item->set_item_count(word_starts_map_.size());
1084 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); 1021 for (const auto& entry : word_starts_map_) {
1085 iter != word_starts_map_.end(); ++iter) {
1086 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); 1022 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry();
1087 map_entry->set_history_id(iter->first); 1023 map_entry->set_history_id(entry.first);
1088 const RowWordStarts& word_starts(iter->second); 1024 const RowWordStarts& word_starts = entry.second;
1089 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); 1025 for (auto url_word_start : word_starts.url_word_starts_)
1090 i != word_starts.url_word_starts_.end(); ++i) 1026 map_entry->add_url_word_starts(url_word_start);
1091 map_entry->add_url_word_starts(*i); 1027 for (auto title_word_start : word_starts.title_word_starts_)
1092 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); 1028 map_entry->add_title_word_starts(title_word_start);
1093 i != word_starts.title_word_starts_.end(); ++i)
1094 map_entry->add_title_word_starts(*i);
1095 } 1029 }
1096 } 1030 }
1097 1031
1098 bool URLIndexPrivateData::RestorePrivateData( 1032 bool URLIndexPrivateData::RestorePrivateData(
1099 const InMemoryURLIndexCacheItem& cache) { 1033 const InMemoryURLIndexCacheItem& cache) {
1100 last_time_rebuilt_from_history_ = 1034 last_time_rebuilt_from_history_ =
1101 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); 1035 base::Time::FromInternalValue(cache.last_rebuild_timestamp());
1102 const base::TimeDelta rebuilt_ago = 1036 const base::TimeDelta rebuilt_ago =
1103 base::Time::Now() - last_time_rebuilt_from_history_; 1037 base::Time::Now() - last_time_rebuilt_from_history_;
1104 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || 1038 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
(...skipping 21 matching lines...) Expand all
1126 1060
1127 bool URLIndexPrivateData::RestoreWordList( 1061 bool URLIndexPrivateData::RestoreWordList(
1128 const InMemoryURLIndexCacheItem& cache) { 1062 const InMemoryURLIndexCacheItem& cache) {
1129 if (!cache.has_word_list()) 1063 if (!cache.has_word_list())
1130 return false; 1064 return false;
1131 const WordListItem& list_item(cache.word_list()); 1065 const WordListItem& list_item(cache.word_list());
1132 uint32_t expected_item_count = list_item.word_count(); 1066 uint32_t expected_item_count = list_item.word_count();
1133 uint32_t actual_item_count = list_item.word_size(); 1067 uint32_t actual_item_count = list_item.word_size();
1134 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1068 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1135 return false; 1069 return false;
1136 const RepeatedPtrField<std::string>& words(list_item.word()); 1070 const RepeatedPtrField<std::string>& words = list_item.word();
1137 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 1071 word_list_.reserve(words.size());
1138 iter != words.end(); ++iter) 1072 std::transform(
1139 word_list_.push_back(base::UTF8ToUTF16(*iter)); 1073 words.begin(), words.end(), std::back_inserter(word_list_),
1074 [](const std::string& word) { return base::UTF8ToUTF16(word); });
1140 return true; 1075 return true;
1141 } 1076 }
1142 1077
1143 bool URLIndexPrivateData::RestoreWordMap( 1078 bool URLIndexPrivateData::RestoreWordMap(
1144 const InMemoryURLIndexCacheItem& cache) { 1079 const InMemoryURLIndexCacheItem& cache) {
1145 if (!cache.has_word_map()) 1080 if (!cache.has_word_map())
1146 return false; 1081 return false;
1147 const WordMapItem& list_item(cache.word_map()); 1082 const WordMapItem& list_item = cache.word_map();
1148 uint32_t expected_item_count = list_item.item_count(); 1083 uint32_t expected_item_count = list_item.item_count();
1149 uint32_t actual_item_count = list_item.word_map_entry_size(); 1084 uint32_t actual_item_count = list_item.word_map_entry_size();
1150 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1085 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1151 return false; 1086 return false;
1152 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 1087 for (const auto& entry : list_item.word_map_entry())
1153 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 1088 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id();
1154 iter != entries.end(); ++iter) 1089
1155 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id();
1156 return true; 1090 return true;
1157 } 1091 }
1158 1092
1159 bool URLIndexPrivateData::RestoreCharWordMap( 1093 bool URLIndexPrivateData::RestoreCharWordMap(
1160 const InMemoryURLIndexCacheItem& cache) { 1094 const InMemoryURLIndexCacheItem& cache) {
1161 if (!cache.has_char_word_map()) 1095 if (!cache.has_char_word_map())
1162 return false; 1096 return false;
1163 const CharWordMapItem& list_item(cache.char_word_map()); 1097 const CharWordMapItem& list_item(cache.char_word_map());
1164 uint32_t expected_item_count = list_item.item_count(); 1098 uint32_t expected_item_count = list_item.item_count();
1165 uint32_t actual_item_count = list_item.char_word_map_entry_size(); 1099 uint32_t actual_item_count = list_item.char_word_map_entry_size();
1166 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1100 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1167 return false; 1101 return false;
1168 const RepeatedPtrField<CharWordMapEntry>& 1102
1169 entries(list_item.char_word_map_entry()); 1103 for (const auto& entry : list_item.char_word_map_entry()) {
1170 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = 1104 expected_item_count = entry.item_count();
1171 entries.begin(); iter != entries.end(); ++iter) { 1105 actual_item_count = entry.word_id_size();
1172 expected_item_count = iter->item_count();
1173 actual_item_count = iter->word_id_size();
1174 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1106 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1175 return false; 1107 return false;
1176 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); 1108 base::char16 uni_char = static_cast<base::char16>(entry.char_16());
1177 WordIDSet word_id_set; 1109 const RepeatedField<int32_t>& word_ids = entry.word_id();
1178 const RepeatedField<int32_t>& word_ids(iter->word_id()); 1110 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()};
1179 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin();
1180 jiter != word_ids.end(); ++jiter)
1181 word_id_set.insert(*jiter);
1182 char_word_map_[uni_char] = word_id_set;
1183 } 1111 }
1184 return true; 1112 return true;
1185 } 1113 }
1186 1114
1187 bool URLIndexPrivateData::RestoreWordIDHistoryMap( 1115 bool URLIndexPrivateData::RestoreWordIDHistoryMap(
1188 const InMemoryURLIndexCacheItem& cache) { 1116 const InMemoryURLIndexCacheItem& cache) {
1189 if (!cache.has_word_id_history_map()) 1117 if (!cache.has_word_id_history_map())
1190 return false; 1118 return false;
1191 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); 1119 const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1192 uint32_t expected_item_count = list_item.item_count(); 1120 uint32_t expected_item_count = list_item.item_count();
1193 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); 1121 uint32_t actual_item_count = list_item.word_id_history_map_entry_size();
1194 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1122 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1195 return false; 1123 return false;
1196 const RepeatedPtrField<WordIDHistoryMapEntry>& 1124 for (const auto& entry : list_item.word_id_history_map_entry()) {
1197 entries(list_item.word_id_history_map_entry()); 1125 expected_item_count = entry.item_count();
1198 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1126 actual_item_count = entry.history_id_size();
1199 entries.begin(); iter != entries.end(); ++iter) {
1200 expected_item_count = iter->item_count();
1201 actual_item_count = iter->history_id_size();
1202 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1127 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1203 return false; 1128 return false;
1204 WordID word_id = iter->word_id(); 1129 WordID word_id = entry.word_id();
1205 HistoryIDSet history_id_set; 1130 const RepeatedField<int64_t>& history_ids = entry.history_id();
1206 const RepeatedField<int64_t>& history_ids(iter->history_id()); 1131 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()};
1207 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin(); 1132 for (HistoryID history_id : history_ids)
1208 jiter != history_ids.end(); ++jiter) { 1133 history_id_word_map_[history_id].insert(word_id);
1209 history_id_set.insert(*jiter);
1210 AddToHistoryIDWordMap(*jiter, word_id);
1211 }
1212 word_id_history_map_[word_id] = history_id_set;
1213 } 1134 }
1214 return true; 1135 return true;
1215 } 1136 }
1216 1137
1217 bool URLIndexPrivateData::RestoreHistoryInfoMap( 1138 bool URLIndexPrivateData::RestoreHistoryInfoMap(
1218 const InMemoryURLIndexCacheItem& cache) { 1139 const InMemoryURLIndexCacheItem& cache) {
1219 if (!cache.has_history_info_map()) 1140 if (!cache.has_history_info_map())
1220 return false; 1141 return false;
1221 const HistoryInfoMapItem& list_item(cache.history_info_map()); 1142 const HistoryInfoMapItem& list_item(cache.history_info_map());
1222 uint32_t expected_item_count = list_item.item_count(); 1143 uint32_t expected_item_count = list_item.item_count();
1223 uint32_t actual_item_count = list_item.history_info_map_entry_size(); 1144 uint32_t actual_item_count = list_item.history_info_map_entry_size();
1224 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1145 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1225 return false; 1146 return false;
1226 const RepeatedPtrField<HistoryInfoMapEntry>& 1147
1227 entries(list_item.history_info_map_entry()); 1148 for (const auto& entry : list_item.history_info_map_entry()) {
1228 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = 1149 HistoryID history_id = entry.history_id();
1229 entries.begin(); iter != entries.end(); ++iter) { 1150 history::URLRow url_row(GURL(entry.url()), history_id);
1230 HistoryID history_id = iter->history_id(); 1151 url_row.set_visit_count(entry.visit_count());
1231 GURL url(iter->url()); 1152 url_row.set_typed_count(entry.typed_count());
1232 history::URLRow url_row(url, history_id); 1153 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit()));
1233 url_row.set_visit_count(iter->visit_count()); 1154 if (entry.has_title())
1234 url_row.set_typed_count(iter->typed_count()); 1155 url_row.set_title(base::UTF8ToUTF16(entry.title()));
1235 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1156 history_info_map_[history_id].url_row = std::move(url_row);
1236 if (iter->has_title()) {
1237 base::string16 title(base::UTF8ToUTF16(iter->title()));
1238 url_row.set_title(title);
1239 }
1240 history_info_map_[history_id].url_row = url_row;
1241 1157
1242 // Restore visits list. 1158 // Restore visits list.
1243 VisitInfoVector visits; 1159 VisitInfoVector visits;
1244 visits.reserve(iter->visits_size()); 1160 visits.reserve(entry.visits_size());
1245 for (int i = 0; i < iter->visits_size(); ++i) { 1161 for (const auto& entry_visit : entry.visits()) {
1246 visits.push_back(std::make_pair( 1162 visits.emplace_back(
1247 base::Time::FromInternalValue(iter->visits(i).visit_time()), 1163 base::Time::FromInternalValue(entry_visit.visit_time()),
1248 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); 1164 ui::PageTransitionFromInt(entry_visit.transition_type()));
1249 } 1165 }
1250 history_info_map_[history_id].visits = visits; 1166 history_info_map_[history_id].visits = std::move(visits);
1251 } 1167 }
1252 return true; 1168 return true;
1253 } 1169 }
1254 1170
1255 bool URLIndexPrivateData::RestoreWordStartsMap( 1171 bool URLIndexPrivateData::RestoreWordStartsMap(
1256 const InMemoryURLIndexCacheItem& cache) { 1172 const InMemoryURLIndexCacheItem& cache) {
1257 // Note that this function must be called after RestoreHistoryInfoMap() has 1173 // Note that this function must be called after RestoreHistoryInfoMap() has
1258 // been run as the word starts may have to be recalculated from the urls and 1174 // been run as the word starts may have to be recalculated from the urls and
1259 // page titles. 1175 // page titles.
1260 if (cache.has_word_starts_map()) { 1176 if (cache.has_word_starts_map()) {
1261 const WordStartsMapItem& list_item(cache.word_starts_map()); 1177 const WordStartsMapItem& list_item(cache.word_starts_map());
1262 uint32_t expected_item_count = list_item.item_count(); 1178 uint32_t expected_item_count = list_item.item_count();
1263 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); 1179 uint32_t actual_item_count = list_item.word_starts_map_entry_size();
1264 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1180 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1265 return false; 1181 return false;
1266 const RepeatedPtrField<WordStartsMapEntry>& 1182 for (const auto& entry : list_item.word_starts_map_entry()) {
1267 entries(list_item.word_starts_map_entry()); 1183 HistoryID history_id = entry.history_id();
1268 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter =
1269 entries.begin(); iter != entries.end(); ++iter) {
1270 HistoryID history_id = iter->history_id();
1271 RowWordStarts word_starts; 1184 RowWordStarts word_starts;
1272 // Restore the URL word starts. 1185 // Restore the URL word starts.
1273 const RepeatedField<int32_t>& url_starts(iter->url_word_starts()); 1186 const RepeatedField<int32_t>& url_starts = entry.url_word_starts();
1274 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin(); 1187 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()};
1275 jiter != url_starts.end(); ++jiter) 1188
1276 word_starts.url_word_starts_.push_back(*jiter);
1277 // Restore the page title word starts. 1189 // Restore the page title word starts.
1278 const RepeatedField<int32_t>& title_starts(iter->title_word_starts()); 1190 const RepeatedField<int32_t>& title_starts = entry.title_word_starts();
1279 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin(); 1191 word_starts.title_word_starts_ = {title_starts.begin(),
1280 jiter != title_starts.end(); ++jiter) 1192 title_starts.end()};
1281 word_starts.title_word_starts_.push_back(*jiter); 1193
1282 word_starts_map_[history_id] = word_starts; 1194 word_starts_map_[history_id] = std::move(word_starts);
1283 } 1195 }
1284 } else { 1196 } else {
1285 // Since the cache did not contain any word starts we must rebuild then from 1197 // Since the cache did not contain any word starts we must rebuild then from
1286 // the URL and page titles. 1198 // the URL and page titles.
1287 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1199 for (const auto& entry : history_info_map_) {
1288 iter != history_info_map_.end(); ++iter) {
1289 RowWordStarts word_starts; 1200 RowWordStarts word_starts;
1290 const history::URLRow& row(iter->second.url_row); 1201 const history::URLRow& row = entry.second.url_row;
1291 const base::string16& url = 1202 const base::string16& url =
1292 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); 1203 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);
1293 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1204 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1294 const base::string16& title = 1205 const base::string16& title =
1295 bookmarks::CleanUpTitleForMatching(row.title()); 1206 bookmarks::CleanUpTitleForMatching(row.title());
1296 String16VectorFromString16(title, false, &word_starts.title_word_starts_); 1207 String16VectorFromString16(title, false, &word_starts.title_word_starts_);
1297 word_starts_map_[iter->first] = word_starts; 1208 word_starts_map_[entry.first] = std::move(word_starts);
1298 } 1209 }
1299 } 1210 }
1300 return true; 1211 return true;
1301 } 1212 }
1302 1213
1303 // static 1214 // static
1304 bool URLIndexPrivateData::URLSchemeIsWhitelisted( 1215 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1305 const GURL& gurl, 1216 const GURL& gurl,
1306 const std::set<std::string>& whitelist) { 1217 const std::set<std::string>& whitelist) {
1307 return whitelist.find(gurl.scheme()) != whitelist.end(); 1218 return whitelist.find(gurl.scheme()) != whitelist.end();
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
1370 // First cut: typed count, visit count, recency. 1281 // First cut: typed count, visit count, recency.
1371 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1282 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1372 // recently visited (within the last 12/24 hours) as highly important. Get 1283 // recently visited (within the last 12/24 hours) as highly important. Get
1373 // input from mpearson. 1284 // input from mpearson.
1374 if (r1.typed_count() != r2.typed_count()) 1285 if (r1.typed_count() != r2.typed_count())
1375 return (r1.typed_count() > r2.typed_count()); 1286 return (r1.typed_count() > r2.typed_count());
1376 if (r1.visit_count() != r2.visit_count()) 1287 if (r1.visit_count() != r2.visit_count())
1377 return (r1.visit_count() > r2.visit_count()); 1288 return (r1.visit_count() > r2.visit_count());
1378 return (r1.last_visit() > r2.last_visit()); 1289 return (r1.last_visit() > r2.last_visit());
1379 } 1290 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698