Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(429)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2719403002: Switching to flat_sets in HQP. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 465 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 HistoryIDVector history_ids; 476 HistoryIDVector history_ids;
477 String16Vector words(unsorted_words); 477 String16Vector words(unsorted_words);
478 // Sort the words into the longest first as such are likely to narrow down 478 // Sort the words into the longest first as such are likely to narrow down
479 // the results quicker. Also, single character words are the most expensive 479 // the results quicker. Also, single character words are the most expensive
480 // to process so save them for last. 480 // to process so save them for last.
481 std::sort(words.begin(), words.end(), LengthGreater); 481 std::sort(words.begin(), words.end(), LengthGreater);
482 482
483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). 483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).
484 for (String16Vector::iterator iter = words.begin(); iter != words.end(); 484 for (String16Vector::iterator iter = words.begin(); iter != words.end();
485 ++iter) { 485 ++iter) {
486 base::string16 uni_word = *iter; 486 HistoryIDSet term_history_set = HistoryIDsForTerm(*iter);
487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
488 if (term_history_set.empty()) { 487 if (term_history_set.empty()) {
Peter Kasting 2017/03/02 22:54:33 Nit: No {}
489 history_ids.clear(); 488 return HistoryIDVector();
490 break;
491 } 489 }
492 if (iter == words.begin()) { 490 if (iter == words.begin()) {
493 history_ids = {term_history_set.begin(), term_history_set.end()}; 491 history_ids = {term_history_set.begin(), term_history_set.end()};
494 } else { 492 } else {
495 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids, 493 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids,
496 term_history_set); 494 term_history_set);
497 } 495 }
498 } 496 }
499 return history_ids; 497 return history_ids;
500 } 498 }
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
581 return HistoryIDSet(); 579 return HistoryIDSet();
582 } 580 }
583 // Or there may not have been a prefix from which to start. 581 // Or there may not have been a prefix from which to start.
584 word_id_set = prefix_chars.empty() ? std::move(leftover_set) 582 word_id_set = prefix_chars.empty() ? std::move(leftover_set)
585 : base::STLSetIntersection<WordIDSet>( 583 : base::STLSetIntersection<WordIDSet>(
586 word_id_set, leftover_set); 584 word_id_set, leftover_set);
587 } 585 }
588 586
589 // We must filter the word list because the resulting word set surely 587 // We must filter the word list because the resulting word set surely
590 // contains words which do not have the search term as a proper subset. 588 // contains words which do not have the search term as a proper subset.
591 for (WordIDSet::iterator word_set_iter = word_id_set.begin(); 589 word_id_set.erase(std::remove_if(word_id_set.begin(), word_id_set.end(),
592 word_set_iter != word_id_set.end(); ) { 590 [&](WordID word_id) {
593 if (word_list_[*word_set_iter].find(term) == base::string16::npos) 591 return word_list_[word_id].find(term) ==
594 word_set_iter = word_id_set.erase(word_set_iter); 592 base::string16::npos;
595 else 593 }),
596 ++word_set_iter; 594 word_id_set.end());
597 }
598 } else { 595 } else {
599 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); 596 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
600 } 597 }
601 598
602 // If any words resulted then we can compose a set of history IDs by unioning 599 // If any words resulted then we can compose a set of history IDs by unioning
603 // the sets from each word. 600 // the sets from each word.
604 HistoryIDSet history_id_set; 601 // We use |buffer| because it's more efficient to collect everything and then
602 // construct a flat_set than to insert elements one by one.
603 HistoryIDVector buffer;
605 for (WordID word_id : word_id_set) { 604 for (WordID word_id : word_id_set) {
606 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 605 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
607 if (word_iter != word_id_history_map_.end()) { 606 if (word_iter != word_id_history_map_.end()) {
608 HistoryIDSet& word_history_id_set(word_iter->second); 607 HistoryIDSet& word_history_id_set(word_iter->second);
609 history_id_set.insert(word_history_id_set.begin(), 608 buffer.insert(buffer.end(), word_history_id_set.begin(),
610 word_history_id_set.end()); 609 word_history_id_set.end());
611 } 610 }
612 } 611 }
612 HistoryIDSet history_id_set(buffer.begin(), buffer.end());
613 613
614 // Record a new cache entry for this word if the term is longer than 614 // Record a new cache entry for this word if the term is longer than
615 // a single character. 615 // a single character.
616 if (term_length > 1) 616 if (term_length > 1)
617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set); 617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);
618 618
619 return history_id_set; 619 return history_id_set;
620 } 620 }
621 621
622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars( 622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
623 const Char16Set& term_chars) { 623 const Char16Set& term_chars) {
624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167). 624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).
625 625
626 WordIDSet word_id_set; 626 WordIDSet word_id_set;
627 for (Char16Set::const_iterator c_iter = term_chars.begin(); 627 for (Char16Set::const_iterator c_iter = term_chars.begin();
628 c_iter != term_chars.end(); ++c_iter) { 628 c_iter != term_chars.end(); ++c_iter) {
629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); 629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
630 if (char_iter == char_word_map_.end()) { 630 // A character was not found so there are no matching results: bail.
631 // A character was not found so there are no matching results: bail. 631 if (char_iter == char_word_map_.end())
632 word_id_set.clear(); 632 return WordIDSet();
633 break; 633
634 } 634 const WordIDSet& char_word_id_set(char_iter->second);
635 WordIDSet& char_word_id_set(char_iter->second);
636 // It is possible for there to no longer be any words associated with 635 // It is possible for there to no longer be any words associated with
637 // a particular character. Give up in that case. 636 // a particular character. Give up in that case.
638 if (char_word_id_set.empty()) { 637 if (char_word_id_set.empty())
639 word_id_set.clear(); 638 return WordIDSet();
640 break;
641 }
642 639
643 if (c_iter == term_chars.begin()) { 640 if (c_iter == term_chars.begin()) {
644 // First character results becomes base set of results.
645 word_id_set = char_word_id_set; 641 word_id_set = char_word_id_set;
646 } else { 642 } else {
647 // Subsequent character results get intersected in.
648 word_id_set = 643 word_id_set =
649 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set); 644 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set);
650 } 645 }
651 } 646 }
652 return word_id_set; 647 return word_id_set;
653 } 648 }
654 649
655 void URLIndexPrivateData::HistoryIdsToScoredMatches( 650 void URLIndexPrivateData::HistoryIdsToScoredMatches(
656 HistoryIDVector history_ids, 651 HistoryIDVector history_ids,
657 const base::string16& lower_raw_string, 652 const base::string16& lower_raw_string,
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
702 const history::URLRow& hist_item = hist_pos->second.url_row; 697 const history::URLRow& hist_item = hist_pos->second.url_row;
703 auto starts_pos = word_starts_map_.find(history_id); 698 auto starts_pos = word_starts_map_.find(history_id);
704 DCHECK(starts_pos != word_starts_map_.end()); 699 DCHECK(starts_pos != word_starts_map_.end());
705 ScoredHistoryMatch new_scored_match( 700 ScoredHistoryMatch new_scored_match(
706 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms, 701 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms,
707 lower_terms_to_word_starts_offsets, starts_pos->second, 702 lower_terms_to_word_starts_offsets, starts_pos->second,
708 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()), 703 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),
709 num_matches, now); 704 num_matches, now);
710 // Filter new matches that ended up scoring 0. (These are usually matches 705 // Filter new matches that ended up scoring 0. (These are usually matches
711 // which didn't match the user's raw terms.) 706 // which didn't match the user's raw terms.)
712 if (new_scored_match.raw_score != 0) 707 if (new_scored_match.raw_score > 0)
713 scored_items->push_back(std::move(new_scored_match)); 708 scored_items->push_back(std::move(new_scored_match));
714 } 709 }
715 } 710 }
716 711
717 // static 712 // static
718 void URLIndexPrivateData::CalculateWordStartsOffsets( 713 void URLIndexPrivateData::CalculateWordStartsOffsets(
719 const String16Vector& lower_terms, 714 const String16Vector& lower_terms,
720 WordStarts* lower_terms_to_word_starts_offsets) { 715 WordStarts* lower_terms_to_word_starts_offsets) {
721 // Calculate offsets for each term. For instance, the offset for 716 // Calculate offsets for each term. For instance, the offset for
722 // ".net" should be 1, indicating that the actual word-part of the term 717 // ".net" should be 1, indicating that the actual word-part of the term
(...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after
1281 // First cut: typed count, visit count, recency. 1276 // First cut: typed count, visit count, recency.
1282 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1277 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1283 // recently visited (within the last 12/24 hours) as highly important. Get 1278 // recently visited (within the last 12/24 hours) as highly important. Get
1284 // input from mpearson. 1279 // input from mpearson.
1285 if (r1.typed_count() != r2.typed_count()) 1280 if (r1.typed_count() != r2.typed_count())
1286 return (r1.typed_count() > r2.typed_count()); 1281 return (r1.typed_count() > r2.typed_count());
1287 if (r1.visit_count() != r2.visit_count()) 1282 if (r1.visit_count() != r2.visit_count())
1288 return (r1.visit_count() > r2.visit_count()); 1283 return (r1.visit_count() > r2.visit_count());
1289 return (r1.last_visit() > r2.last_visit()); 1284 return (r1.last_visit() > r2.last_visit());
1290 } 1285 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698