OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/url_index_private_data.h" | 5 #include "components/omnibox/browser/url_index_private_data.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <iterator> | 10 #include <iterator> |
(...skipping 19 matching lines...) Expand all Loading... | |
30 #include "components/omnibox/browser/in_memory_url_index.h" | 30 #include "components/omnibox/browser/in_memory_url_index.h" |
31 #include "components/search_engines/template_url_service.h" | 31 #include "components/search_engines/template_url_service.h" |
32 #include "components/url_formatter/url_formatter.h" | 32 #include "components/url_formatter/url_formatter.h" |
33 | 33 |
34 #if defined(USE_SYSTEM_PROTOBUF) | 34 #if defined(USE_SYSTEM_PROTOBUF) |
35 #include <google/protobuf/repeated_field.h> | 35 #include <google/protobuf/repeated_field.h> |
36 #else | 36 #else |
37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 37 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
38 #endif | 38 #endif |
39 | 39 |
40 namespace { | |
41 | |
40 using google::protobuf::RepeatedField; | 42 using google::protobuf::RepeatedField; |
41 using google::protobuf::RepeatedPtrField; | 43 using google::protobuf::RepeatedPtrField; |
42 using in_memory_url_index::InMemoryURLIndexCacheItem; | 44 using in_memory_url_index::InMemoryURLIndexCacheItem; |
43 | 45 |
44 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem | 46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordListItem |
45 WordListItem; | 47 WordListItem; |
46 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry | 48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry |
47 WordMapEntry; | 49 WordMapEntry; |
48 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; | 50 typedef in_memory_url_index::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; |
49 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem | 51 typedef in_memory_url_index::InMemoryURLIndexCacheItem_CharWordMapItem |
(...skipping 20 matching lines...) Expand all Loading... | |
70 WordStartsMapEntry; | 72 WordStartsMapEntry; |
71 | 73 |
72 // Algorithm Functions --------------------------------------------------------- | 74 // Algorithm Functions --------------------------------------------------------- |
73 | 75 |
74 // Comparison function for sorting search terms by descending length. | 76 // Comparison function for sorting search terms by descending length. |
75 bool LengthGreater(const base::string16& string_a, | 77 bool LengthGreater(const base::string16& string_a, |
76 const base::string16& string_b) { | 78 const base::string16& string_b) { |
77 return string_a.length() > string_b.length(); | 79 return string_a.length() > string_b.length(); |
78 } | 80 } |
79 | 81 |
82 } // namespace | |
80 | 83 |
81 // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- | 84 // UpdateRecentVisitsFromHistoryDBTask ----------------------------------------- |
82 | 85 |
83 // HistoryDBTask used to update the recent visit data for a particular | 86 // HistoryDBTask used to update the recent visit data for a particular |
84 // row from the history database. | 87 // row from the history database. |
85 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask { | 88 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask { |
86 public: | 89 public: |
87 explicit UpdateRecentVisitsFromHistoryDBTask( | 90 explicit UpdateRecentVisitsFromHistoryDBTask( |
88 URLIndexPrivateData* private_data, | 91 URLIndexPrivateData* private_data, |
89 history::URLID url_id); | 92 history::URLID url_id); |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
180 // the index we need individual, lower-cased words, ignoring escapings. For | 183 // the index we need individual, lower-cased words, ignoring escapings. For |
181 // the final filtering we need whitespace separated substrings possibly | 184 // the final filtering we need whitespace separated substrings possibly |
182 // containing escaped characters. | 185 // containing escaped characters. |
183 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); | 186 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
184 base::string16 lower_unescaped_string = net::UnescapeURLComponent( | 187 base::string16 lower_unescaped_string = net::UnescapeURLComponent( |
185 lower_raw_string, | 188 lower_raw_string, |
186 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | | 189 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
187 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); | 190 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
188 | 191 |
189 // Extract individual 'words' (as opposed to 'terms'; see comment in | 192 // Extract individual 'words' (as opposed to 'terms'; see comment in |
190 // HistoryIdSetToScoredMatches()) from the search string. When the user | 193 // HistoryIdsToScoredMatches()) from the search string. When the user types |
191 // types "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", | 194 // "colspec=ID%20Mstone Release" we get four 'words': "colspec", "id", |
192 // "mstone" and "release". | 195 // "mstone" and "release". |
193 String16Vector lower_words( | 196 String16Vector lower_words( |
194 String16VectorFromString16(lower_unescaped_string, false, nullptr)); | 197 String16VectorFromString16(lower_unescaped_string, false, nullptr)); |
195 if (lower_words.empty()) | 198 if (lower_words.empty()) |
196 continue; | 199 continue; |
197 HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); | |
198 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_id_set); | |
199 | 200 |
200 ScoredHistoryMatches temp_scored_items; | 201 HistoryIDVector history_ids = HistoryIDsFromWords(lower_words); |
201 HistoryIdSetToScoredMatches(history_id_set, lower_raw_string, | 202 history_ids_were_trimmed |= TrimHistoryIdsPool(&history_ids); |
202 template_url_service, bookmark_model, | 203 |
203 &temp_scored_items); | 204 HistoryIdsToScoredMatches(std::move(history_ids), lower_raw_string, |
204 scored_items.insert(scored_items.end(), temp_scored_items.begin(), | 205 template_url_service, bookmark_model, |
205 temp_scored_items.end()); | 206 &scored_items); |
dyaroshev
2017/02/26 01:12:17
In fixed version of HistoryIdsToScoredMatches() we
| |
206 } | 207 } |
207 // Select and sort only the top |max_matches| results. | 208 // Select and sort only the top |max_matches| results. |
208 if (scored_items.size() > max_matches) { | 209 if (scored_items.size() > max_matches) { |
209 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, | 210 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, |
210 scored_items.end(), | 211 scored_items.end(), |
211 ScoredHistoryMatch::MatchScoreGreater); | 212 ScoredHistoryMatch::MatchScoreGreater); |
212 scored_items.resize(max_matches); | 213 scored_items.resize(max_matches); |
213 } else { | 214 } else { |
214 std::sort(scored_items.begin(), scored_items.end(), | 215 std::sort(scored_items.begin(), scored_items.end(), |
215 ScoredHistoryMatch::MatchScoreGreater); | 216 ScoredHistoryMatch::MatchScoreGreater); |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
445 // search_term_cache_ | 446 // search_term_cache_ |
446 } | 447 } |
447 | 448 |
448 bool URLIndexPrivateData::Empty() const { | 449 bool URLIndexPrivateData::Empty() const { |
449 return history_info_map_.empty(); | 450 return history_info_map_.empty(); |
450 } | 451 } |
451 | 452 |
452 void URLIndexPrivateData::Clear() { | 453 void URLIndexPrivateData::Clear() { |
453 last_time_rebuilt_from_history_ = base::Time(); | 454 last_time_rebuilt_from_history_ = base::Time(); |
454 word_list_.clear(); | 455 word_list_.clear(); |
455 available_words_.clear(); | 456 available_words_ = {}; |
456 word_map_.clear(); | 457 word_map_.clear(); |
457 char_word_map_.clear(); | 458 char_word_map_.clear(); |
458 word_id_history_map_.clear(); | 459 word_id_history_map_.clear(); |
459 history_id_word_map_.clear(); | 460 history_id_word_map_.clear(); |
460 history_info_map_.clear(); | 461 history_info_map_.clear(); |
461 word_starts_map_.clear(); | 462 word_starts_map_.clear(); |
462 } | 463 } |
463 | 464 |
464 URLIndexPrivateData::~URLIndexPrivateData() {} | 465 URLIndexPrivateData::~URLIndexPrivateData() {} |
465 | 466 |
466 HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords( | 467 HistoryIDVector URLIndexPrivateData::HistoryIDsFromWords( |
467 const String16Vector& unsorted_words) { | 468 const String16Vector& unsorted_words) { |
469 // This histogram name reflects the historic name of this function. | |
468 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords"); | 470 SCOPED_UMA_HISTOGRAM_TIMER("Omnibox.HistoryQuickHistoryIDSetFromWords"); |
469 // Break the terms down into individual terms (words), get the candidate | 471 // Break the terms down into individual terms (words), get the candidate |
470 // set for each term, and intersect each to get a final candidate list. | 472 // set for each term, and intersect each to get a final candidate list. |
471 // Note that a single 'term' from the user's perspective might be | 473 // Note that a single 'term' from the user's perspective might be |
472 // a string like "http://www.somewebsite.com" which, from our perspective, | 474 // a string like "http://www.somewebsite.com" which, from our perspective, |
473 // is four words: 'http', 'www', 'somewebsite', and 'com'. | 475 // is four words: 'http', 'www', 'somewebsite', and 'com'. |
474 HistoryIDSet history_id_set; | 476 HistoryIDVector history_ids; |
475 String16Vector words(unsorted_words); | 477 String16Vector words(unsorted_words); |
476 // Sort the words into the longest first as such are likely to narrow down | 478 // Sort the words into the longest first as such are likely to narrow down |
477 // the results quicker. Also, single character words are the most expensive | 479 // the results quicker. Also, single character words are the most expensive |
478 // to process so save them for last. | 480 // to process so save them for last. |
479 std::sort(words.begin(), words.end(), LengthGreater); | 481 std::sort(words.begin(), words.end(), LengthGreater); |
482 | |
483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). | |
480 for (String16Vector::iterator iter = words.begin(); iter != words.end(); | 484 for (String16Vector::iterator iter = words.begin(); iter != words.end(); |
481 ++iter) { | 485 ++iter) { |
482 base::string16 uni_word = *iter; | 486 base::string16 uni_word = *iter; |
Peter Kasting
2017/02/28 00:33:54
Nit: Inline into next statement.
| |
483 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); | 487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word); |
484 if (term_history_set.empty()) { | 488 if (term_history_set.empty()) { |
485 history_id_set.clear(); | 489 history_ids.clear(); |
Peter Kasting
2017/02/28 00:33:54
Nit: Just "return HistoryIDVector();" rather than
| |
486 break; | 490 break; |
487 } | 491 } |
488 if (iter == words.begin()) { | 492 if (iter == words.begin()) { |
489 history_id_set.swap(term_history_set); | 493 history_ids = {term_history_set.begin(), term_history_set.end()}; |
490 } else { | 494 } else { |
491 HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>( | 495 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids, |
492 history_id_set, term_history_set); | 496 term_history_set); |
493 history_id_set.swap(new_history_id_set); | |
494 } | 497 } |
495 } | 498 } |
496 return history_id_set; | 499 return history_ids; |
497 } | 500 } |
498 | 501 |
499 bool URLIndexPrivateData::TrimHistoryIdsPool( | 502 bool URLIndexPrivateData::TrimHistoryIdsPool( |
500 HistoryIDSet* history_id_set) const { | 503 HistoryIDVector* history_ids) const { |
501 constexpr size_t kItemsToScoreLimit = 500; | 504 constexpr size_t kItemsToScoreLimit = 500; |
502 if (history_id_set->size() <= kItemsToScoreLimit) | 505 if (history_ids->size() <= kItemsToScoreLimit) |
503 return false; | 506 return false; |
504 | 507 |
505 HistoryIDVector history_ids(history_id_set->begin(), history_id_set->end()); | |
506 | |
507 // Trim down the set by sorting by typed-count, visit-count, and last | 508 // Trim down the set by sorting by typed-count, visit-count, and last |
508 // visit. | 509 // visit. |
509 auto new_end = history_ids.begin() + kItemsToScoreLimit; | 510 auto new_end = history_ids->begin() + kItemsToScoreLimit; |
510 HistoryItemFactorGreater item_factor_functor(history_info_map_); | 511 HistoryItemFactorGreater item_factor_functor(history_info_map_); |
511 | 512 |
512 std::nth_element(history_ids.begin(), new_end, history_ids.end(), | 513 std::nth_element(history_ids->begin(), new_end, history_ids->end(), |
513 item_factor_functor); | 514 item_factor_functor); |
514 history_ids.erase(new_end, history_ids.end()); | 515 history_ids->erase(new_end, history_ids->end()); |
515 | 516 |
516 *history_id_set = {history_ids.begin(), history_ids.end()}; | |
517 return true; | 517 return true; |
518 } | 518 } |
519 | 519 |
520 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( | 520 HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm( |
521 const base::string16& term) { | 521 const base::string16& term) { |
522 if (term.empty()) | 522 if (term.empty()) |
523 return HistoryIDSet(); | 523 return HistoryIDSet(); |
524 | 524 |
525 // TODO(mrossetti): Consider optimizing for very common terms such as | 525 // TODO(mrossetti): Consider optimizing for very common terms such as |
526 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently | 526 // 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
574 | 574 |
575 // Reduce the word set with any leftover, unprocessed characters. | 575 // Reduce the word set with any leftover, unprocessed characters. |
576 if (!unique_chars.empty()) { | 576 if (!unique_chars.empty()) { |
577 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); | 577 WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); |
578 // We might come up empty on the leftovers. | 578 // We might come up empty on the leftovers. |
579 if (leftover_set.empty()) { | 579 if (leftover_set.empty()) { |
580 search_term_cache_[term] = SearchTermCacheItem(); | 580 search_term_cache_[term] = SearchTermCacheItem(); |
581 return HistoryIDSet(); | 581 return HistoryIDSet(); |
582 } | 582 } |
583 // Or there may not have been a prefix from which to start. | 583 // Or there may not have been a prefix from which to start. |
584 if (prefix_chars.empty()) { | 584 word_id_set = prefix_chars.empty() ? std::move(leftover_set) |
585 word_id_set.swap(leftover_set); | 585 : base::STLSetIntersection<WordIDSet>( |
586 } else { | 586 word_id_set, leftover_set); |
587 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>( | |
588 word_id_set, leftover_set); | |
589 word_id_set.swap(new_word_id_set); | |
590 } | |
591 } | 587 } |
592 | 588 |
593 // We must filter the word list because the resulting word set surely | 589 // We must filter the word list because the resulting word set surely |
594 // contains words which do not have the search term as a proper subset. | 590 // contains words which do not have the search term as a proper subset. |
595 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); | 591 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); |
596 word_set_iter != word_id_set.end(); ) { | 592 word_set_iter != word_id_set.end(); ) { |
597 if (word_list_[*word_set_iter].find(term) == base::string16::npos) | 593 if (word_list_[*word_set_iter].find(term) == base::string16::npos) |
598 word_set_iter = word_id_set.erase(word_set_iter); | 594 word_set_iter = word_id_set.erase(word_set_iter); |
599 else | 595 else |
600 ++word_set_iter; | 596 ++word_set_iter; |
601 } | 597 } |
602 } else { | 598 } else { |
603 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); | 599 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); |
604 } | 600 } |
605 | 601 |
606 // If any words resulted then we can compose a set of history IDs by unioning | 602 // If any words resulted then we can compose a set of history IDs by unioning |
607 // the sets from each word. | 603 // the sets from each word. |
608 HistoryIDSet history_id_set; | 604 HistoryIDSet history_id_set; |
609 if (!word_id_set.empty()) { | 605 for (WordID word_id : word_id_set) { |
610 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); | 606 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); |
611 word_id_iter != word_id_set.end(); ++word_id_iter) { | 607 if (word_iter != word_id_history_map_.end()) { |
612 WordID word_id = *word_id_iter; | 608 HistoryIDSet& word_history_id_set(word_iter->second); |
613 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); | 609 history_id_set.insert(word_history_id_set.begin(), |
614 if (word_iter != word_id_history_map_.end()) { | 610 word_history_id_set.end()); |
615 HistoryIDSet& word_history_id_set(word_iter->second); | |
616 history_id_set.insert(word_history_id_set.begin(), | |
617 word_history_id_set.end()); | |
618 } | |
619 } | 611 } |
620 } | 612 } |
621 | 613 |
622 // Record a new cache entry for this word if the term is longer than | 614 // Record a new cache entry for this word if the term is longer than |
623 // a single character. | 615 // a single character. |
624 if (term_length > 1) | 616 if (term_length > 1) |
625 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); | 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); |
626 | 618 |
627 return history_id_set; | 619 return history_id_set; |
628 } | 620 } |
629 | 621 |
630 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( | 622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( |
631 const Char16Set& term_chars) { | 623 const Char16Set& term_chars) { |
624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). | |
625 | |
632 WordIDSet word_id_set; | 626 WordIDSet word_id_set; |
633 for (Char16Set::const_iterator c_iter = term_chars.begin(); | 627 for (Char16Set::const_iterator c_iter = term_chars.begin(); |
634 c_iter != term_chars.end(); ++c_iter) { | 628 c_iter != term_chars.end(); ++c_iter) { |
635 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); | 629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); |
636 if (char_iter == char_word_map_.end()) { | 630 if (char_iter == char_word_map_.end()) { |
637 // A character was not found so there are no matching results: bail. | 631 // A character was not found so there are no matching results: bail. |
638 word_id_set.clear(); | 632 word_id_set.clear(); |
Peter Kasting
2017/02/28 00:33:54
Nit: Same comment as above
| |
639 break; | 633 break; |
640 } | 634 } |
641 WordIDSet& char_word_id_set(char_iter->second); | 635 WordIDSet& char_word_id_set(char_iter->second); |
Peter Kasting
2017/02/28 00:33:54
Nit: Can be const
| |
642 // It is possible for there to no longer be any words associated with | 636 // It is possible for there to no longer be any words associated with |
643 // a particular character. Give up in that case. | 637 // a particular character. Give up in that case. |
644 if (char_word_id_set.empty()) { | 638 if (char_word_id_set.empty()) { |
645 word_id_set.clear(); | 639 word_id_set.clear(); |
646 break; | 640 break; |
647 } | 641 } |
648 | 642 |
649 if (c_iter == term_chars.begin()) { | 643 if (c_iter == term_chars.begin()) { |
650 // First character results becomes base set of results. | 644 // First character results becomes base set of results. |
Peter Kasting
2017/02/28 00:33:55
Nit: These comments just restate the code, remove.
| |
651 word_id_set = char_word_id_set; | 645 word_id_set = char_word_id_set; |
652 } else { | 646 } else { |
653 // Subsequent character results get intersected in. | 647 // Subsequent character results get intersected in. |
654 WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>( | 648 word_id_set = |
655 word_id_set, char_word_id_set); | 649 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set); |
656 word_id_set.swap(new_word_id_set); | |
657 } | 650 } |
658 } | 651 } |
659 return word_id_set; | 652 return word_id_set; |
660 } | 653 } |
661 | 654 |
662 void URLIndexPrivateData::HistoryIdSetToScoredMatches( | 655 void URLIndexPrivateData::HistoryIdsToScoredMatches( |
663 HistoryIDSet history_id_set, | 656 HistoryIDVector history_ids, |
664 const base::string16& lower_raw_string, | 657 const base::string16& lower_raw_string, |
665 const TemplateURLService* template_url_service, | 658 const TemplateURLService* template_url_service, |
666 bookmarks::BookmarkModel* bookmark_model, | 659 bookmarks::BookmarkModel* bookmark_model, |
667 ScoredHistoryMatches* scored_items) const { | 660 ScoredHistoryMatches* scored_items) const { |
668 if (history_id_set.empty()) | 661 if (history_ids.empty()) |
669 return; | 662 return; |
670 | 663 |
671 // Break up the raw search string (complete with escaped URL elements) into | 664 // Break up the raw search string (complete with escaped URL elements) into |
672 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()). | 665 // 'terms' (as opposed to 'words'; see comment in HistoryItemsForTerms()). |
673 // We only want to break up the search string on 'true' whitespace rather than | 666 // We only want to break up the search string on 'true' whitespace rather than |
674 // escaped whitespace. For example, when the user types | 667 // escaped whitespace. For example, when the user types |
675 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and | 668 // "colspec=ID%20Mstone Release" we get two 'terms': "colspec=id%20mstone" and |
676 // "release". | 669 // "release". |
677 String16Vector lower_raw_terms = | 670 String16Vector lower_raw_terms = |
678 base::SplitString(lower_raw_string, base::kWhitespaceUTF16, | 671 base::SplitString(lower_raw_string, base::kWhitespaceUTF16, |
679 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); | 672 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); |
680 | 673 |
681 // Don't score matches when there are no terms to score against. (It's | 674 // Don't score matches when there are no terms to score against. (It's |
682 // possible that the word break iterater that extracts words to search for in | 675 // possible that the word break iterater that extracts words to search for in |
683 // the database allows some whitespace "words" whereas SplitString excludes a | 676 // the database allows some whitespace "words" whereas SplitString excludes a |
684 // long list of whitespace.) One could write a scoring function that gives a | 677 // long list of whitespace.) One could write a scoring function that gives a |
685 // reasonable order to matches when there are no terms (i.e., all the words | 678 // reasonable order to matches when there are no terms (i.e., all the words |
686 // are some form of whitespace), but this is such a rare edge case that it's | 679 // are some form of whitespace), but this is such a rare edge case that it's |
687 // not worth the time. | 680 // not worth the time. |
688 if (lower_raw_terms.empty()) | 681 if (lower_raw_terms.empty()) |
689 return; | 682 return; |
690 | 683 |
691 WordStarts lower_terms_to_word_starts_offsets; | 684 WordStarts lower_terms_to_word_starts_offsets; |
692 CalculateWordStartsOffsets(lower_raw_terms, | 685 CalculateWordStartsOffsets(lower_raw_terms, |
693 &lower_terms_to_word_starts_offsets); | 686 &lower_terms_to_word_starts_offsets); |
694 | 687 |
695 // Filter bad matches and other matches we don't want to display. | 688 // Filter bad matches and other matches we don't want to display. |
696 for (auto it = history_id_set.begin();;) { | 689 auto filter = [this, template_url_service](const HistoryID history_id) { |
697 it = std::find_if(it, history_id_set.end(), | 690 return ShouldFilter(history_id, template_url_service); |
698 [this, template_url_service](const HistoryID history_id) { | 691 }; |
699 return ShouldFilter(history_id, template_url_service); | 692 history_ids.erase( |
700 }); | 693 std::remove_if(history_ids.begin(), history_ids.end(), filter), |
701 if (it == history_id_set.end()) | 694 history_ids.end()); |
702 break; | |
703 it = history_id_set.erase(it); | |
704 } | |
705 | 695 |
706 // Score the matches. | 696 // Score the matches. |
707 const size_t num_matches = history_id_set.size(); | 697 const size_t num_matches = history_ids.size(); |
708 const base::Time now = base::Time::Now(); | 698 const base::Time now = base::Time::Now(); |
709 std::transform( | |
710 history_id_set.begin(), history_id_set.end(), | |
711 std::back_inserter(*scored_items), [&](const HistoryID history_id) { | |
712 auto hist_pos = history_info_map_.find(history_id); | |
713 const history::URLRow& hist_item = hist_pos->second.url_row; | |
714 auto starts_pos = word_starts_map_.find(history_id); | |
715 DCHECK(starts_pos != word_starts_map_.end()); | |
716 return ScoredHistoryMatch( | |
717 hist_item, hist_pos->second.visits, lower_raw_string, | |
718 lower_raw_terms, lower_terms_to_word_starts_offsets, | |
719 starts_pos->second, | |
720 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), | |
721 num_matches, now); | |
722 }); | |
723 | 699 |
dyaroshev
2017/02/26 01:12:17
First transforming and then removing seems unneces
| |
724 // Filter all matches that ended up scoring 0. (These are usually matches | 700 for (HistoryID history_id : history_ids) { |
725 // which didn't match the user's raw terms.) | 701 auto hist_pos = history_info_map_.find(history_id); |
726 scored_items->erase(std::remove_if(scored_items->begin(), scored_items->end(), | 702 const history::URLRow& hist_item = hist_pos->second.url_row; |
727 [](const ScoredHistoryMatch& match) { | 703 auto starts_pos = word_starts_map_.find(history_id); |
728 return match.raw_score == 0; | 704 DCHECK(starts_pos != word_starts_map_.end()); |
729 }), | 705 ScoredHistoryMatch new_scored_match( |
730 scored_items->end()); | 706 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms, |
707 lower_terms_to_word_starts_offsets, starts_pos->second, | |
708 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), | |
709 num_matches, now); | |
710 // Filter new matches that ended up scoring 0. (These are usually matches | |
711 // which didn't match the user's raw terms.) | |
712 if (new_scored_match.raw_score != 0) | |
Peter Kasting
2017/02/28 00:33:54
Nit: > 0?
| |
713 scored_items->push_back(std::move(new_scored_match)); | |
714 } | |
731 } | 715 } |
732 | 716 |
733 // static | 717 // static |
734 void URLIndexPrivateData::CalculateWordStartsOffsets( | 718 void URLIndexPrivateData::CalculateWordStartsOffsets( |
735 const String16Vector& lower_terms, | 719 const String16Vector& lower_terms, |
736 WordStarts* lower_terms_to_word_starts_offsets) { | 720 WordStarts* lower_terms_to_word_starts_offsets) { |
737 // Calculate offsets for each term. For instance, the offset for | 721 // Calculate offsets for each term. For instance, the offset for |
738 // ".net" should be 1, indicating that the actual word-part of the term | 722 // ".net" should be 1, indicating that the actual word-part of the term |
739 // starts at offset 1. | 723 // starts at offset 1. |
740 lower_terms_to_word_starts_offsets->resize(lower_terms.size(), 0u); | 724 lower_terms_to_word_starts_offsets->resize(lower_terms.size(), 0u); |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
812 HistoryID history_id = static_cast<HistoryID>(row.id()); | 796 HistoryID history_id = static_cast<HistoryID>(row.id()); |
813 // Split URL into individual, unique words then add in the title words. | 797 // Split URL into individual, unique words then add in the title words. |
814 const GURL& gurl(row.url()); | 798 const GURL& gurl(row.url()); |
815 const base::string16& url = | 799 const base::string16& url = |
816 bookmarks::CleanUpUrlForMatching(gurl, nullptr); | 800 bookmarks::CleanUpUrlForMatching(gurl, nullptr); |
817 String16Set url_words = String16SetFromString16(url, | 801 String16Set url_words = String16SetFromString16(url, |
818 word_starts ? &word_starts->url_word_starts_ : nullptr); | 802 word_starts ? &word_starts->url_word_starts_ : nullptr); |
819 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); | 803 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); |
820 String16Set title_words = String16SetFromString16(title, | 804 String16Set title_words = String16SetFromString16(title, |
821 word_starts ? &word_starts->title_word_starts_ : nullptr); | 805 word_starts ? &word_starts->title_word_starts_ : nullptr); |
822 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); | 806 for (const auto& word : |
823 for (String16Set::iterator word_iter = words.begin(); | 807 base::STLSetUnion<String16Set>(url_words, title_words)) |
824 word_iter != words.end(); ++word_iter) | 808 AddWordToIndex(word, history_id); |
825 AddWordToIndex(*word_iter, history_id); | |
826 | 809 |
827 search_term_cache_.clear(); // Invalidate the term cache. | 810 search_term_cache_.clear(); // Invalidate the term cache. |
828 } | 811 } |
829 | 812 |
830 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, | 813 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, |
831 HistoryID history_id) { | 814 HistoryID history_id) { |
832 WordMap::iterator word_pos = word_map_.find(term); | 815 WordMap::iterator word_pos; |
833 if (word_pos != word_map_.end()) | 816 bool is_new; |
834 UpdateWordHistory(word_pos->second, history_id); | 817 std::tie(word_pos, is_new) = word_map_.emplace(term, WordID()); |
835 else | 818 |
836 AddWordHistory(term, history_id); | 819 // Adding a new word (i.e. a word that is not already in the word index). |
820 if (is_new) { | |
821 word_pos->second = AddNewWordToWordList(term); | |
822 | |
823 // For each character in the newly added word add the word to the character | |
824 // index. | |
825 for (base::char16 uni_char : Char16SetFromString16(term)) | |
826 char_word_map_[uni_char].insert(word_pos->second); | |
827 } | |
828 | |
829 word_id_history_map_[word_pos->second].insert(history_id); | |
830 history_id_word_map_[history_id].insert(word_pos->second); | |
837 } | 831 } |
838 | 832 |
839 void URLIndexPrivateData::AddWordHistory(const base::string16& term, | 833 WordID URLIndexPrivateData::AddNewWordToWordList(const base::string16& term) { |
840 HistoryID history_id) { | |
841 WordID word_id = word_list_.size(); | 834 WordID word_id = word_list_.size(); |
842 if (available_words_.empty()) { | 835 if (available_words_.empty()) { |
843 word_list_.push_back(term); | 836 word_list_.push_back(term); |
844 } else { | 837 return word_id; |
845 word_id = *(available_words_.begin()); | |
846 word_list_[word_id] = term; | |
847 available_words_.erase(word_id); | |
848 } | 838 } |
849 word_map_[term] = word_id; | |
850 | 839 |
851 HistoryIDSet history_id_set; | 840 word_id = available_words_.top(); |
852 history_id_set.insert(history_id); | 841 available_words_.pop(); |
853 word_id_history_map_[word_id] = history_id_set; | 842 word_list_[word_id] = term; |
854 AddToHistoryIDWordMap(history_id, word_id); | 843 return word_id; |
855 | |
856 // For each character in the newly added word (i.e. a word that is not | |
857 // already in the word index), add the word to the character index. | |
858 Char16Set characters = Char16SetFromString16(term); | |
859 for (Char16Set::iterator uni_char_iter = characters.begin(); | |
860 uni_char_iter != characters.end(); ++uni_char_iter) { | |
861 base::char16 uni_char = *uni_char_iter; | |
862 CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); | |
863 if (char_iter != char_word_map_.end()) { | |
864 // Update existing entry in the char/word index. | |
865 WordIDSet& word_id_set(char_iter->second); | |
866 word_id_set.insert(word_id); | |
867 } else { | |
868 // Create a new entry in the char/word index. | |
869 WordIDSet word_id_set; | |
870 word_id_set.insert(word_id); | |
871 char_word_map_[uni_char] = word_id_set; | |
872 } | |
873 } | |
874 } | |
875 | |
876 void URLIndexPrivateData::UpdateWordHistory(WordID word_id, | |
877 HistoryID history_id) { | |
878 WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); | |
879 DCHECK(history_pos != word_id_history_map_.end()); | |
880 HistoryIDSet& history_id_set(history_pos->second); | |
881 history_id_set.insert(history_id); | |
882 AddToHistoryIDWordMap(history_id, word_id); | |
883 } | |
884 | |
885 void URLIndexPrivateData::AddToHistoryIDWordMap(HistoryID history_id, | |
886 WordID word_id) { | |
887 HistoryIDWordMap::iterator iter = history_id_word_map_.find(history_id); | |
888 if (iter != history_id_word_map_.end()) { | |
889 WordIDSet& word_id_set(iter->second); | |
890 word_id_set.insert(word_id); | |
891 } else { | |
892 WordIDSet word_id_set; | |
893 word_id_set.insert(word_id); | |
894 history_id_word_map_[history_id] = word_id_set; | |
895 } | |
896 } | 844 } |
897 | 845 |
898 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) { | 846 void URLIndexPrivateData::RemoveRowFromIndex(const history::URLRow& row) { |
899 RemoveRowWordsFromIndex(row); | 847 RemoveRowWordsFromIndex(row); |
900 HistoryID history_id = static_cast<HistoryID>(row.id()); | 848 HistoryID history_id = static_cast<HistoryID>(row.id()); |
901 history_info_map_.erase(history_id); | 849 history_info_map_.erase(history_id); |
902 word_starts_map_.erase(history_id); | 850 word_starts_map_.erase(history_id); |
903 } | 851 } |
904 | 852 |
905 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) { | 853 void URLIndexPrivateData::RemoveRowWordsFromIndex(const history::URLRow& row) { |
906 // Remove the entries in history_id_word_map_ and word_id_history_map_ for | 854 // Remove the entries in history_id_word_map_ and word_id_history_map_ for |
907 // this row. | 855 // this row. |
908 HistoryID history_id = static_cast<HistoryID>(row.id()); | 856 HistoryID history_id = static_cast<HistoryID>(row.id()); |
909 WordIDSet word_id_set = history_id_word_map_[history_id]; | 857 WordIDSet word_id_set = history_id_word_map_[history_id]; |
910 history_id_word_map_.erase(history_id); | 858 history_id_word_map_.erase(history_id); |
911 | 859 |
912 // Reconcile any changes to word usage. | 860 // Reconcile any changes to word usage. |
913 for (WordIDSet::iterator word_id_iter = word_id_set.begin(); | 861 for (WordID word_id : word_id_set) { |
914 word_id_iter != word_id_set.end(); ++word_id_iter) { | 862 auto word_id_history_map_iter = word_id_history_map_.find(word_id); |
915 WordID word_id = *word_id_iter; | 863 DCHECK(word_id_history_map_iter != word_id_history_map_.end()); |
916 word_id_history_map_[word_id].erase(history_id); | 864 |
917 if (!word_id_history_map_[word_id].empty()) | 865 word_id_history_map_iter->second.erase(history_id); |
918 continue; // The word is still in use. | 866 if (!word_id_history_map_iter->second.empty()) |
867 continue; | |
919 | 868 |
920 // The word is no longer in use. Reconcile any changes to character usage. | 869 // The word is no longer in use. Reconcile any changes to character usage. |
921 base::string16 word = word_list_[word_id]; | 870 base::string16 word = word_list_[word_id]; |
922 Char16Set characters = Char16SetFromString16(word); | 871 for (base::char16 uni_char : Char16SetFromString16(word)) { |
923 for (Char16Set::iterator uni_char_iter = characters.begin(); | 872 auto char_word_map_iter = char_word_map_.find(uni_char); |
924 uni_char_iter != characters.end(); ++uni_char_iter) { | 873 char_word_map_iter->second.erase(word_id); |
925 base::char16 uni_char = *uni_char_iter; | 874 if (char_word_map_iter->second.empty()) |
926 char_word_map_[uni_char].erase(word_id); | 875 char_word_map_.erase(char_word_map_iter); |
927 if (char_word_map_[uni_char].empty()) | |
928 char_word_map_.erase(uni_char); // No longer in use. | |
929 } | 876 } |
930 | 877 |
931 // Complete the removal of references to the word. | 878 // Complete the removal of references to the word. |
932 word_id_history_map_.erase(word_id); | 879 word_id_history_map_.erase(word_id_history_map_iter); |
933 word_map_.erase(word); | 880 word_map_.erase(word); |
934 word_list_[word_id] = base::string16(); | 881 word_list_[word_id] = base::string16(); |
935 available_words_.insert(word_id); | 882 available_words_.push(word_id); |
936 } | 883 } |
937 } | 884 } |
938 | 885 |
939 void URLIndexPrivateData::ResetSearchTermCache() { | 886 void URLIndexPrivateData::ResetSearchTermCache() { |
940 for (SearchTermCacheMap::iterator iter = search_term_cache_.begin(); | 887 for (auto& item : search_term_cache_) |
941 iter != search_term_cache_.end(); ++iter) | 888 item.second.used_ = false; |
942 iter->second.used_ = false; | |
943 } | 889 } |
944 | 890 |
945 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) { | 891 bool URLIndexPrivateData::SaveToFile(const base::FilePath& file_path) { |
946 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 892 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
947 InMemoryURLIndexCacheItem index_cache; | 893 InMemoryURLIndexCacheItem index_cache; |
948 SavePrivateData(&index_cache); | 894 SavePrivateData(&index_cache); |
949 std::string data; | 895 std::string data; |
950 if (!index_cache.SerializeToString(&data)) { | 896 if (!index_cache.SerializeToString(&data)) { |
951 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; | 897 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; |
952 return false; | 898 return false; |
(...skipping 24 matching lines...) Expand all Loading... | |
977 SaveWordIDHistoryMap(cache); | 923 SaveWordIDHistoryMap(cache); |
978 SaveHistoryInfoMap(cache); | 924 SaveHistoryInfoMap(cache); |
979 SaveWordStartsMap(cache); | 925 SaveWordStartsMap(cache); |
980 } | 926 } |
981 | 927 |
982 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { | 928 void URLIndexPrivateData::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
983 if (word_list_.empty()) | 929 if (word_list_.empty()) |
984 return; | 930 return; |
985 WordListItem* list_item = cache->mutable_word_list(); | 931 WordListItem* list_item = cache->mutable_word_list(); |
986 list_item->set_word_count(word_list_.size()); | 932 list_item->set_word_count(word_list_.size()); |
987 for (String16Vector::const_iterator iter = word_list_.begin(); | 933 for (const base::string16& word : word_list_) |
988 iter != word_list_.end(); ++iter) | 934 list_item->add_word(base::UTF16ToUTF8(word)); |
989 list_item->add_word(base::UTF16ToUTF8(*iter)); | |
990 } | 935 } |
991 | 936 |
992 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { | 937 void URLIndexPrivateData::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { |
993 if (word_map_.empty()) | 938 if (word_map_.empty()) |
994 return; | 939 return; |
995 WordMapItem* map_item = cache->mutable_word_map(); | 940 WordMapItem* map_item = cache->mutable_word_map(); |
996 map_item->set_item_count(word_map_.size()); | 941 map_item->set_item_count(word_map_.size()); |
997 for (WordMap::const_iterator iter = word_map_.begin(); | 942 for (const auto& elem : word_map_) { |
998 iter != word_map_.end(); ++iter) { | |
999 WordMapEntry* map_entry = map_item->add_word_map_entry(); | 943 WordMapEntry* map_entry = map_item->add_word_map_entry(); |
1000 map_entry->set_word(base::UTF16ToUTF8(iter->first)); | 944 map_entry->set_word(base::UTF16ToUTF8(elem.first)); |
1001 map_entry->set_word_id(iter->second); | 945 map_entry->set_word_id(elem.second); |
1002 } | 946 } |
1003 } | 947 } |
1004 | 948 |
1005 void URLIndexPrivateData::SaveCharWordMap( | 949 void URLIndexPrivateData::SaveCharWordMap( |
1006 InMemoryURLIndexCacheItem* cache) const { | 950 InMemoryURLIndexCacheItem* cache) const { |
1007 if (char_word_map_.empty()) | 951 if (char_word_map_.empty()) |
1008 return; | 952 return; |
1009 CharWordMapItem* map_item = cache->mutable_char_word_map(); | 953 CharWordMapItem* map_item = cache->mutable_char_word_map(); |
1010 map_item->set_item_count(char_word_map_.size()); | 954 map_item->set_item_count(char_word_map_.size()); |
1011 for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); | 955 for (const auto& entry : char_word_map_) { |
1012 iter != char_word_map_.end(); ++iter) { | |
1013 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); | 956 CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); |
1014 map_entry->set_char_16(iter->first); | 957 map_entry->set_char_16(entry.first); |
1015 const WordIDSet& word_id_set(iter->second); | 958 const WordIDSet& word_id_set = entry.second; |
1016 map_entry->set_item_count(word_id_set.size()); | 959 map_entry->set_item_count(word_id_set.size()); |
1017 for (WordIDSet::const_iterator set_iter = word_id_set.begin(); | 960 for (WordID word_id : word_id_set) |
1018 set_iter != word_id_set.end(); ++set_iter) | 961 map_entry->add_word_id(word_id); |
1019 map_entry->add_word_id(*set_iter); | |
1020 } | 962 } |
1021 } | 963 } |
1022 | 964 |
1023 void URLIndexPrivateData::SaveWordIDHistoryMap( | 965 void URLIndexPrivateData::SaveWordIDHistoryMap( |
1024 InMemoryURLIndexCacheItem* cache) const { | 966 InMemoryURLIndexCacheItem* cache) const { |
1025 if (word_id_history_map_.empty()) | 967 if (word_id_history_map_.empty()) |
1026 return; | 968 return; |
1027 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); | 969 WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); |
1028 map_item->set_item_count(word_id_history_map_.size()); | 970 map_item->set_item_count(word_id_history_map_.size()); |
1029 for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); | 971 for (const auto& entry : word_id_history_map_) { |
1030 iter != word_id_history_map_.end(); ++iter) { | |
1031 WordIDHistoryMapEntry* map_entry = | 972 WordIDHistoryMapEntry* map_entry = |
1032 map_item->add_word_id_history_map_entry(); | 973 map_item->add_word_id_history_map_entry(); |
1033 map_entry->set_word_id(iter->first); | 974 map_entry->set_word_id(entry.first); |
1034 const HistoryIDSet& history_id_set(iter->second); | 975 const HistoryIDSet& history_id_set = entry.second; |
1035 map_entry->set_item_count(history_id_set.size()); | 976 map_entry->set_item_count(history_id_set.size()); |
1036 for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); | 977 for (HistoryID history_id : history_id_set) |
1037 set_iter != history_id_set.end(); ++set_iter) | 978 map_entry->add_history_id(history_id); |
1038 map_entry->add_history_id(*set_iter); | |
1039 } | 979 } |
1040 } | 980 } |
1041 | 981 |
1042 void URLIndexPrivateData::SaveHistoryInfoMap( | 982 void URLIndexPrivateData::SaveHistoryInfoMap( |
1043 InMemoryURLIndexCacheItem* cache) const { | 983 InMemoryURLIndexCacheItem* cache) const { |
1044 if (history_info_map_.empty()) | 984 if (history_info_map_.empty()) |
1045 return; | 985 return; |
1046 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 986 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
1047 map_item->set_item_count(history_info_map_.size()); | 987 map_item->set_item_count(history_info_map_.size()); |
1048 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 988 for (const auto& entry : history_info_map_) { |
1049 iter != history_info_map_.end(); ++iter) { | |
1050 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 989 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
1051 map_entry->set_history_id(iter->first); | 990 map_entry->set_history_id(entry.first); |
1052 const history::URLRow& url_row(iter->second.url_row); | 991 const history::URLRow& url_row = entry.second.url_row; |
1053 // Note: We only save information that contributes to the index so there | 992 // Note: We only save information that contributes to the index so there |
1054 // is no need to save search_term_cache_ (not persistent). | 993 // is no need to save search_term_cache_ (not persistent). |
1055 map_entry->set_visit_count(url_row.visit_count()); | 994 map_entry->set_visit_count(url_row.visit_count()); |
1056 map_entry->set_typed_count(url_row.typed_count()); | 995 map_entry->set_typed_count(url_row.typed_count()); |
1057 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 996 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1058 map_entry->set_url(url_row.url().spec()); | 997 map_entry->set_url(url_row.url().spec()); |
1059 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); | 998 map_entry->set_title(base::UTF16ToUTF8(url_row.title())); |
1060 const VisitInfoVector& visits(iter->second.visits); | 999 for (const auto& visit : entry.second.visits) { |
1061 for (VisitInfoVector::const_iterator visit_iter = visits.begin(); | |
1062 visit_iter != visits.end(); ++visit_iter) { | |
1063 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits(); | 1000 HistoryInfoMapEntry_VisitInfo* visit_info = map_entry->add_visits(); |
1064 visit_info->set_visit_time(visit_iter->first.ToInternalValue()); | 1001 visit_info->set_visit_time(visit.first.ToInternalValue()); |
1065 visit_info->set_transition_type(visit_iter->second); | 1002 visit_info->set_transition_type(visit.second); |
1066 } | 1003 } |
1067 } | 1004 } |
1068 } | 1005 } |
1069 | 1006 |
1070 void URLIndexPrivateData::SaveWordStartsMap( | 1007 void URLIndexPrivateData::SaveWordStartsMap( |
1071 InMemoryURLIndexCacheItem* cache) const { | 1008 InMemoryURLIndexCacheItem* cache) const { |
1072 if (word_starts_map_.empty()) | 1009 if (word_starts_map_.empty()) |
1073 return; | 1010 return; |
1074 // For unit testing: Enable saving of the cache as an earlier version to | 1011 // For unit testing: Enable saving of the cache as an earlier version to |
1075 // allow testing of cache file upgrading in ReadFromFile(). | 1012 // allow testing of cache file upgrading in ReadFromFile(). |
1076 // TODO(mrossetti): Instead of intruding on production code with this kind of | 1013 // TODO(mrossetti): Instead of intruding on production code with this kind of |
1077 // test harness, save a copy of an older version cache with known results. | 1014 // test harness, save a copy of an older version cache with known results. |
1078 // Implement this when switching the caching over to SQLite. | 1015 // Implement this when switching the caching over to SQLite. |
1079 if (saved_cache_version_ < 1) | 1016 if (saved_cache_version_ < 1) |
1080 return; | 1017 return; |
1081 | 1018 |
1082 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); | 1019 WordStartsMapItem* map_item = cache->mutable_word_starts_map(); |
1083 map_item->set_item_count(word_starts_map_.size()); | 1020 map_item->set_item_count(word_starts_map_.size()); |
1084 for (WordStartsMap::const_iterator iter = word_starts_map_.begin(); | 1021 for (const auto& entry : word_starts_map_) { |
1085 iter != word_starts_map_.end(); ++iter) { | |
1086 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); | 1022 WordStartsMapEntry* map_entry = map_item->add_word_starts_map_entry(); |
1087 map_entry->set_history_id(iter->first); | 1023 map_entry->set_history_id(entry.first); |
1088 const RowWordStarts& word_starts(iter->second); | 1024 const RowWordStarts& word_starts = entry.second; |
1089 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); | 1025 for (auto url_word_start : word_starts.url_word_starts_) |
1090 i != word_starts.url_word_starts_.end(); ++i) | 1026 map_entry->add_url_word_starts(url_word_start); |
1091 map_entry->add_url_word_starts(*i); | 1027 for (auto title_word_start : word_starts.title_word_starts_) |
1092 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); | 1028 map_entry->add_title_word_starts(title_word_start); |
1093 i != word_starts.title_word_starts_.end(); ++i) | |
1094 map_entry->add_title_word_starts(*i); | |
1095 } | 1029 } |
1096 } | 1030 } |
1097 | 1031 |
1098 bool URLIndexPrivateData::RestorePrivateData( | 1032 bool URLIndexPrivateData::RestorePrivateData( |
1099 const InMemoryURLIndexCacheItem& cache) { | 1033 const InMemoryURLIndexCacheItem& cache) { |
1100 last_time_rebuilt_from_history_ = | 1034 last_time_rebuilt_from_history_ = |
1101 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); | 1035 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); |
1102 const base::TimeDelta rebuilt_ago = | 1036 const base::TimeDelta rebuilt_ago = |
1103 base::Time::Now() - last_time_rebuilt_from_history_; | 1037 base::Time::Now() - last_time_rebuilt_from_history_; |
1104 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || | 1038 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || |
(...skipping 21 matching lines...) Expand all Loading... | |
1126 | 1060 |
1127 bool URLIndexPrivateData::RestoreWordList( | 1061 bool URLIndexPrivateData::RestoreWordList( |
1128 const InMemoryURLIndexCacheItem& cache) { | 1062 const InMemoryURLIndexCacheItem& cache) { |
1129 if (!cache.has_word_list()) | 1063 if (!cache.has_word_list()) |
1130 return false; | 1064 return false; |
1131 const WordListItem& list_item(cache.word_list()); | 1065 const WordListItem& list_item(cache.word_list()); |
1132 uint32_t expected_item_count = list_item.word_count(); | 1066 uint32_t expected_item_count = list_item.word_count(); |
1133 uint32_t actual_item_count = list_item.word_size(); | 1067 uint32_t actual_item_count = list_item.word_size(); |
1134 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1068 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1135 return false; | 1069 return false; |
1136 const RepeatedPtrField<std::string>& words(list_item.word()); | 1070 const RepeatedPtrField<std::string>& words = list_item.word(); |
1137 for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); | 1071 word_list_.reserve(words.size()); |
1138 iter != words.end(); ++iter) | 1072 std::transform( |
1139 word_list_.push_back(base::UTF8ToUTF16(*iter)); | 1073 words.begin(), words.end(), std::back_inserter(word_list_), |
1074 [](const std::string& word) { return base::UTF8ToUTF16(word); }); | |
1140 return true; | 1075 return true; |
1141 } | 1076 } |
1142 | 1077 |
1143 bool URLIndexPrivateData::RestoreWordMap( | 1078 bool URLIndexPrivateData::RestoreWordMap( |
1144 const InMemoryURLIndexCacheItem& cache) { | 1079 const InMemoryURLIndexCacheItem& cache) { |
1145 if (!cache.has_word_map()) | 1080 if (!cache.has_word_map()) |
1146 return false; | 1081 return false; |
1147 const WordMapItem& list_item(cache.word_map()); | 1082 const WordMapItem& list_item = cache.word_map(); |
1148 uint32_t expected_item_count = list_item.item_count(); | 1083 uint32_t expected_item_count = list_item.item_count(); |
1149 uint32_t actual_item_count = list_item.word_map_entry_size(); | 1084 uint32_t actual_item_count = list_item.word_map_entry_size(); |
1150 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1085 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1151 return false; | 1086 return false; |
1152 const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); | 1087 for (const auto& entry : list_item.word_map_entry()) |
1153 for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); | 1088 word_map_[base::UTF8ToUTF16(entry.word())] = entry.word_id(); |
1154 iter != entries.end(); ++iter) | 1089 |
1155 word_map_[base::UTF8ToUTF16(iter->word())] = iter->word_id(); | |
1156 return true; | 1090 return true; |
1157 } | 1091 } |
1158 | 1092 |
1159 bool URLIndexPrivateData::RestoreCharWordMap( | 1093 bool URLIndexPrivateData::RestoreCharWordMap( |
1160 const InMemoryURLIndexCacheItem& cache) { | 1094 const InMemoryURLIndexCacheItem& cache) { |
1161 if (!cache.has_char_word_map()) | 1095 if (!cache.has_char_word_map()) |
1162 return false; | 1096 return false; |
1163 const CharWordMapItem& list_item(cache.char_word_map()); | 1097 const CharWordMapItem& list_item(cache.char_word_map()); |
1164 uint32_t expected_item_count = list_item.item_count(); | 1098 uint32_t expected_item_count = list_item.item_count(); |
1165 uint32_t actual_item_count = list_item.char_word_map_entry_size(); | 1099 uint32_t actual_item_count = list_item.char_word_map_entry_size(); |
1166 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1100 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1167 return false; | 1101 return false; |
1168 const RepeatedPtrField<CharWordMapEntry>& | 1102 |
1169 entries(list_item.char_word_map_entry()); | 1103 for (const auto& entry : list_item.char_word_map_entry()) { |
1170 for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = | 1104 expected_item_count = entry.item_count(); |
1171 entries.begin(); iter != entries.end(); ++iter) { | 1105 actual_item_count = entry.word_id_size(); |
1172 expected_item_count = iter->item_count(); | |
1173 actual_item_count = iter->word_id_size(); | |
1174 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1106 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1175 return false; | 1107 return false; |
1176 base::char16 uni_char = static_cast<base::char16>(iter->char_16()); | 1108 base::char16 uni_char = static_cast<base::char16>(entry.char_16()); |
1177 WordIDSet word_id_set; | 1109 const RepeatedField<int32_t>& word_ids = entry.word_id(); |
1178 const RepeatedField<int32_t>& word_ids(iter->word_id()); | 1110 char_word_map_[uni_char] = {word_ids.begin(), word_ids.end()}; |
1179 for (RepeatedField<int32_t>::const_iterator jiter = word_ids.begin(); | |
1180 jiter != word_ids.end(); ++jiter) | |
1181 word_id_set.insert(*jiter); | |
1182 char_word_map_[uni_char] = word_id_set; | |
1183 } | 1111 } |
1184 return true; | 1112 return true; |
1185 } | 1113 } |
1186 | 1114 |
1187 bool URLIndexPrivateData::RestoreWordIDHistoryMap( | 1115 bool URLIndexPrivateData::RestoreWordIDHistoryMap( |
1188 const InMemoryURLIndexCacheItem& cache) { | 1116 const InMemoryURLIndexCacheItem& cache) { |
1189 if (!cache.has_word_id_history_map()) | 1117 if (!cache.has_word_id_history_map()) |
1190 return false; | 1118 return false; |
1191 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); | 1119 const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); |
1192 uint32_t expected_item_count = list_item.item_count(); | 1120 uint32_t expected_item_count = list_item.item_count(); |
1193 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); | 1121 uint32_t actual_item_count = list_item.word_id_history_map_entry_size(); |
1194 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1122 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1195 return false; | 1123 return false; |
1196 const RepeatedPtrField<WordIDHistoryMapEntry>& | 1124 for (const auto& entry : list_item.word_id_history_map_entry()) { |
1197 entries(list_item.word_id_history_map_entry()); | 1125 expected_item_count = entry.item_count(); |
1198 for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = | 1126 actual_item_count = entry.history_id_size(); |
1199 entries.begin(); iter != entries.end(); ++iter) { | |
1200 expected_item_count = iter->item_count(); | |
1201 actual_item_count = iter->history_id_size(); | |
1202 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1127 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1203 return false; | 1128 return false; |
1204 WordID word_id = iter->word_id(); | 1129 WordID word_id = entry.word_id(); |
1205 HistoryIDSet history_id_set; | 1130 const RepeatedField<int64_t>& history_ids = entry.history_id(); |
1206 const RepeatedField<int64_t>& history_ids(iter->history_id()); | 1131 word_id_history_map_[word_id] = {history_ids.begin(), history_ids.end()}; |
1207 for (RepeatedField<int64_t>::const_iterator jiter = history_ids.begin(); | 1132 for (HistoryID history_id : history_ids) |
1208 jiter != history_ids.end(); ++jiter) { | 1133 history_id_word_map_[history_id].insert(word_id); |
1209 history_id_set.insert(*jiter); | |
1210 AddToHistoryIDWordMap(*jiter, word_id); | |
1211 } | |
1212 word_id_history_map_[word_id] = history_id_set; | |
1213 } | 1134 } |
1214 return true; | 1135 return true; |
1215 } | 1136 } |
1216 | 1137 |
1217 bool URLIndexPrivateData::RestoreHistoryInfoMap( | 1138 bool URLIndexPrivateData::RestoreHistoryInfoMap( |
1218 const InMemoryURLIndexCacheItem& cache) { | 1139 const InMemoryURLIndexCacheItem& cache) { |
1219 if (!cache.has_history_info_map()) | 1140 if (!cache.has_history_info_map()) |
1220 return false; | 1141 return false; |
1221 const HistoryInfoMapItem& list_item(cache.history_info_map()); | 1142 const HistoryInfoMapItem& list_item(cache.history_info_map()); |
1222 uint32_t expected_item_count = list_item.item_count(); | 1143 uint32_t expected_item_count = list_item.item_count(); |
1223 uint32_t actual_item_count = list_item.history_info_map_entry_size(); | 1144 uint32_t actual_item_count = list_item.history_info_map_entry_size(); |
1224 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1145 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1225 return false; | 1146 return false; |
1226 const RepeatedPtrField<HistoryInfoMapEntry>& | 1147 |
1227 entries(list_item.history_info_map_entry()); | 1148 for (const auto& entry : list_item.history_info_map_entry()) { |
1228 for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = | 1149 HistoryID history_id = entry.history_id(); |
1229 entries.begin(); iter != entries.end(); ++iter) { | 1150 history::URLRow url_row(GURL(entry.url()), history_id); |
1230 HistoryID history_id = iter->history_id(); | 1151 url_row.set_visit_count(entry.visit_count()); |
1231 GURL url(iter->url()); | 1152 url_row.set_typed_count(entry.typed_count()); |
1232 history::URLRow url_row(url, history_id); | 1153 url_row.set_last_visit(base::Time::FromInternalValue(entry.last_visit())); |
1233 url_row.set_visit_count(iter->visit_count()); | 1154 if (entry.has_title()) |
1234 url_row.set_typed_count(iter->typed_count()); | 1155 url_row.set_title(base::UTF8ToUTF16(entry.title())); |
1235 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1156 history_info_map_[history_id].url_row = std::move(url_row); |
1236 if (iter->has_title()) { | |
1237 base::string16 title(base::UTF8ToUTF16(iter->title())); | |
1238 url_row.set_title(title); | |
1239 } | |
1240 history_info_map_[history_id].url_row = url_row; | |
1241 | 1157 |
1242 // Restore visits list. | 1158 // Restore visits list. |
1243 VisitInfoVector visits; | 1159 VisitInfoVector visits; |
1244 visits.reserve(iter->visits_size()); | 1160 visits.reserve(entry.visits_size()); |
1245 for (int i = 0; i < iter->visits_size(); ++i) { | 1161 for (const auto& entry_visit : entry.visits()) { |
1246 visits.push_back(std::make_pair( | 1162 visits.emplace_back( |
1247 base::Time::FromInternalValue(iter->visits(i).visit_time()), | 1163 base::Time::FromInternalValue(entry_visit.visit_time()), |
1248 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); | 1164 ui::PageTransitionFromInt(entry_visit.transition_type())); |
1249 } | 1165 } |
1250 history_info_map_[history_id].visits = visits; | 1166 history_info_map_[history_id].visits = std::move(visits); |
1251 } | 1167 } |
1252 return true; | 1168 return true; |
1253 } | 1169 } |
1254 | 1170 |
1255 bool URLIndexPrivateData::RestoreWordStartsMap( | 1171 bool URLIndexPrivateData::RestoreWordStartsMap( |
1256 const InMemoryURLIndexCacheItem& cache) { | 1172 const InMemoryURLIndexCacheItem& cache) { |
1257 // Note that this function must be called after RestoreHistoryInfoMap() has | 1173 // Note that this function must be called after RestoreHistoryInfoMap() has |
1258 // been run as the word starts may have to be recalculated from the urls and | 1174 // been run as the word starts may have to be recalculated from the urls and |
1259 // page titles. | 1175 // page titles. |
1260 if (cache.has_word_starts_map()) { | 1176 if (cache.has_word_starts_map()) { |
1261 const WordStartsMapItem& list_item(cache.word_starts_map()); | 1177 const WordStartsMapItem& list_item(cache.word_starts_map()); |
1262 uint32_t expected_item_count = list_item.item_count(); | 1178 uint32_t expected_item_count = list_item.item_count(); |
1263 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); | 1179 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); |
1264 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1180 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1265 return false; | 1181 return false; |
1266 const RepeatedPtrField<WordStartsMapEntry>& | 1182 for (const auto& entry : list_item.word_starts_map_entry()) { |
1267 entries(list_item.word_starts_map_entry()); | 1183 HistoryID history_id = entry.history_id(); |
1268 for (RepeatedPtrField<WordStartsMapEntry>::const_iterator iter = | |
1269 entries.begin(); iter != entries.end(); ++iter) { | |
1270 HistoryID history_id = iter->history_id(); | |
1271 RowWordStarts word_starts; | 1184 RowWordStarts word_starts; |
1272 // Restore the URL word starts. | 1185 // Restore the URL word starts. |
1273 const RepeatedField<int32_t>& url_starts(iter->url_word_starts()); | 1186 const RepeatedField<int32_t>& url_starts = entry.url_word_starts(); |
1274 for (RepeatedField<int32_t>::const_iterator jiter = url_starts.begin(); | 1187 word_starts.url_word_starts_ = {url_starts.begin(), url_starts.end()}; |
1275 jiter != url_starts.end(); ++jiter) | 1188 |
1276 word_starts.url_word_starts_.push_back(*jiter); | |
1277 // Restore the page title word starts. | 1189 // Restore the page title word starts. |
1278 const RepeatedField<int32_t>& title_starts(iter->title_word_starts()); | 1190 const RepeatedField<int32_t>& title_starts = entry.title_word_starts(); |
1279 for (RepeatedField<int32_t>::const_iterator jiter = title_starts.begin(); | 1191 word_starts.title_word_starts_ = {title_starts.begin(), |
1280 jiter != title_starts.end(); ++jiter) | 1192 title_starts.end()}; |
1281 word_starts.title_word_starts_.push_back(*jiter); | 1193 |
1282 word_starts_map_[history_id] = word_starts; | 1194 word_starts_map_[history_id] = std::move(word_starts); |
1283 } | 1195 } |
1284 } else { | 1196 } else { |
1285 // Since the cache did not contain any word starts we must rebuild then from | 1197 // Since the cache did not contain any word starts we must rebuild then from |
1286 // the URL and page titles. | 1198 // the URL and page titles. |
1287 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1199 for (const auto& entry : history_info_map_) { |
1288 iter != history_info_map_.end(); ++iter) { | |
1289 RowWordStarts word_starts; | 1200 RowWordStarts word_starts; |
1290 const history::URLRow& row(iter->second.url_row); | 1201 const history::URLRow& row = entry.second.url_row; |
1291 const base::string16& url = | 1202 const base::string16& url = |
1292 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); | 1203 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); |
1293 String16VectorFromString16(url, false, &word_starts.url_word_starts_); | 1204 String16VectorFromString16(url, false, &word_starts.url_word_starts_); |
1294 const base::string16& title = | 1205 const base::string16& title = |
1295 bookmarks::CleanUpTitleForMatching(row.title()); | 1206 bookmarks::CleanUpTitleForMatching(row.title()); |
1296 String16VectorFromString16(title, false, &word_starts.title_word_starts_); | 1207 String16VectorFromString16(title, false, &word_starts.title_word_starts_); |
1297 word_starts_map_[iter->first] = word_starts; | 1208 word_starts_map_[entry.first] = std::move(word_starts); |
1298 } | 1209 } |
1299 } | 1210 } |
1300 return true; | 1211 return true; |
1301 } | 1212 } |
1302 | 1213 |
1303 // static | 1214 // static |
1304 bool URLIndexPrivateData::URLSchemeIsWhitelisted( | 1215 bool URLIndexPrivateData::URLSchemeIsWhitelisted( |
1305 const GURL& gurl, | 1216 const GURL& gurl, |
1306 const std::set<std::string>& whitelist) { | 1217 const std::set<std::string>& whitelist) { |
1307 return whitelist.find(gurl.scheme()) != whitelist.end(); | 1218 return whitelist.find(gurl.scheme()) != whitelist.end(); |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1370 // First cut: typed count, visit count, recency. | 1281 // First cut: typed count, visit count, recency. |
1371 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1282 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
1372 // recently visited (within the last 12/24 hours) as highly important. Get | 1283 // recently visited (within the last 12/24 hours) as highly important. Get |
1373 // input from mpearson. | 1284 // input from mpearson. |
1374 if (r1.typed_count() != r2.typed_count()) | 1285 if (r1.typed_count() != r2.typed_count()) |
1375 return (r1.typed_count() > r2.typed_count()); | 1286 return (r1.typed_count() > r2.typed_count()); |
1376 if (r1.visit_count() != r2.visit_count()) | 1287 if (r1.visit_count() != r2.visit_count()) |
1377 return (r1.visit_count() > r2.visit_count()); | 1288 return (r1.visit_count() > r2.visit_count()); |
1378 return (r1.last_visit() > r2.last_visit()); | 1289 return (r1.last_visit() > r2.last_visit()); |
1379 } | 1290 } |
OLD | NEW |