components/omnibox/browser/url_index_private_data.cc - Issue 2719403002: Switching to flat_sets in HQP.

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2719403002: Switching to flat_sets in HQP. (Closed)

Patch Set: Review, round 1. Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 465 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
476 HistoryIDVector history_ids;	476 HistoryIDVector history_ids;

477 String16Vector words(unsorted_words);	477 String16Vector words(unsorted_words);

478 // Sort the words into the longest first as such are likely to narrow down	478 // Sort the words into the longest first as such are likely to narrow down

479 // the results quicker. Also, single character words are the most expensive	479 // the results quicker. Also, single character words are the most expensive

480 // to process so save them for last.	480 // to process so save them for last.

481 std::sort(words.begin(), words.end(), LengthGreater);	481 std::sort(words.begin(), words.end(), LengthGreater);

482	482

483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).	483 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).

484 for (String16Vector::iterator iter = words.begin(); iter != words.end();	484 for (String16Vector::iterator iter = words.begin(); iter != words.end();

485 ++iter) {	485 ++iter) {

486 base::string16 uni_word = *iter;	486 HistoryIDSet term_history_set = HistoryIDsForTerm(*iter);

487 HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);	487 if (term_history_set.empty())

488 if (term_history_set.empty()) {	488 return HistoryIDVector();

489 history_ids.clear();	489

490 break;

491 }

492 if (iter == words.begin()) {	490 if (iter == words.begin()) {

493 history_ids = {term_history_set.begin(), term_history_set.end()};	491 history_ids = {term_history_set.begin(), term_history_set.end()};

494 } else {	492 } else {

495 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids,	493 history_ids = base::STLSetIntersection<HistoryIDVector>(history_ids,

496 term_history_set);	494 term_history_set);

497 }	495 }

498 }	496 }

499 return history_ids;	497 return history_ids;

500 }	498 }

501	499

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
581 return HistoryIDSet();	579 return HistoryIDSet();

582 }	580 }

583 // Or there may not have been a prefix from which to start.	581 // Or there may not have been a prefix from which to start.

584 word_id_set = prefix_chars.empty() ? std::move(leftover_set)	582 word_id_set = prefix_chars.empty() ? std::move(leftover_set)

585 : base::STLSetIntersection<WordIDSet>(	583 : base::STLSetIntersection<WordIDSet>(

586 word_id_set, leftover_set);	584 word_id_set, leftover_set);

587 }	585 }

588	586

589 // We must filter the word list because the resulting word set surely	587 // We must filter the word list because the resulting word set surely

590 // contains words which do not have the search term as a proper subset.	588 // contains words which do not have the search term as a proper subset.

591 for (WordIDSet::iterator word_set_iter = word_id_set.begin();	589 word_id_set.erase(std::remove_if(word_id_set.begin(), word_id_set.end(),

592 word_set_iter != word_id_set.end(); ) {	590 [&](WordID word_id) {

593 if (word_list_[*word_set_iter].find(term) == base::string16::npos)	591 return word_list_[word_id].find(term) ==

594 word_set_iter = word_id_set.erase(word_set_iter);	592 base::string16::npos;

595 else	593 }),

596 ++word_set_iter;	594 word_id_set.end());

597 }

598 } else {	595 } else {

599 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));	596 word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

600 }	597 }

601	598

602 // If any words resulted then we can compose a set of history IDs by unioning	599 // If any words resulted then we can compose a set of history IDs by unioning

603 // the sets from each word.	600 // the sets from each word.

604 HistoryIDSet history_id_set;	601 // We use \|buffer\| because it's more efficient to collect everything and then

	602 // construct a flat_set than to insert elements one by one.

	603 HistoryIDVector buffer;

605 for (WordID word_id : word_id_set) {	604 for (WordID word_id : word_id_set) {

606 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);	605 WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

607 if (word_iter != word_id_history_map_.end()) {	606 if (word_iter != word_id_history_map_.end()) {

608 HistoryIDSet& word_history_id_set(word_iter->second);	607 HistoryIDSet& word_history_id_set(word_iter->second);

609 history_id_set.insert(word_history_id_set.begin(),	608 buffer.insert(buffer.end(), word_history_id_set.begin(),

610 word_history_id_set.end());	609 word_history_id_set.end());

611 }	610 }

612 }	611 }

	612 HistoryIDSet history_id_set(buffer.begin(), buffer.end());

613	613

614 // Record a new cache entry for this word if the term is longer than	614 // Record a new cache entry for this word if the term is longer than

615 // a single character.	615 // a single character.

616 if (term_length > 1)	616 if (term_length > 1)

617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);	617 search_term_cache_[term] = SearchTermCacheItem(word_id_set, history_id_set);

618	618

619 return history_id_set;	619 return history_id_set;

620 }	620 }

621	621

622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(	622 WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

623 const Char16Set& term_chars) {	623 const Char16Set& term_chars) {

624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).	624 // TODO(dyaroshev): write a generic algorithm(crbug.com/696167).

625	625

626 WordIDSet word_id_set;	626 WordIDSet word_id_set;

627 for (Char16Set::const_iterator c_iter = term_chars.begin();	627 for (Char16Set::const_iterator c_iter = term_chars.begin();

628 c_iter != term_chars.end(); ++c_iter) {	628 c_iter != term_chars.end(); ++c_iter) {

629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);	629 CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);

630 if (char_iter == char_word_map_.end()) {	630 // A character was not found so there are no matching results: bail.

631 // A character was not found so there are no matching results: bail.	631 if (char_iter == char_word_map_.end())

632 word_id_set.clear();	632 return WordIDSet();

633 break;	633

634 }	634 const WordIDSet& char_word_id_set(char_iter->second);

635 WordIDSet& char_word_id_set(char_iter->second);

636 // It is possible for there to no longer be any words associated with	635 // It is possible for there to no longer be any words associated with

637 // a particular character. Give up in that case.	636 // a particular character. Give up in that case.

638 if (char_word_id_set.empty()) {	637 if (char_word_id_set.empty())

639 word_id_set.clear();	638 return WordIDSet();

640 break;

641 }

642	639

643 if (c_iter == term_chars.begin()) {	640 if (c_iter == term_chars.begin()) {

644 // First character results becomes base set of results.

645 word_id_set = char_word_id_set;	641 word_id_set = char_word_id_set;

646 } else {	642 } else {

647 // Subsequent character results get intersected in.

648 word_id_set =	643 word_id_set =

649 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set);	644 base::STLSetIntersection<WordIDSet>(word_id_set, char_word_id_set);

650 }	645 }

651 }	646 }

652 return word_id_set;	647 return word_id_set;

653 }	648 }

654	649

655 void URLIndexPrivateData::HistoryIdsToScoredMatches(	650 void URLIndexPrivateData::HistoryIdsToScoredMatches(

656 HistoryIDVector history_ids,	651 HistoryIDVector history_ids,

657 const base::string16& lower_raw_string,	652 const base::string16& lower_raw_string,

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
702 const history::URLRow& hist_item = hist_pos->second.url_row;	697 const history::URLRow& hist_item = hist_pos->second.url_row;

703 auto starts_pos = word_starts_map_.find(history_id);	698 auto starts_pos = word_starts_map_.find(history_id);

704 DCHECK(starts_pos != word_starts_map_.end());	699 DCHECK(starts_pos != word_starts_map_.end());

705 ScoredHistoryMatch new_scored_match(	700 ScoredHistoryMatch new_scored_match(

706 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms,	701 hist_item, hist_pos->second.visits, lower_raw_string, lower_raw_terms,

707 lower_terms_to_word_starts_offsets, starts_pos->second,	702 lower_terms_to_word_starts_offsets, starts_pos->second,

708 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),	703 bookmark_model && bookmark_model->IsBookmarked(hist_item.url()),

709 num_matches, now);	704 num_matches, now);

710 // Filter new matches that ended up scoring 0. (These are usually matches	705 // Filter new matches that ended up scoring 0. (These are usually matches

711 // which didn't match the user's raw terms.)	706 // which didn't match the user's raw terms.)

712 if (new_scored_match.raw_score != 0)	707 if (new_scored_match.raw_score > 0)

713 scored_items->push_back(std::move(new_scored_match));	708 scored_items->push_back(std::move(new_scored_match));

714 }	709 }

715 }	710 }

716	711

717 // static	712 // static

718 void URLIndexPrivateData::CalculateWordStartsOffsets(	713 void URLIndexPrivateData::CalculateWordStartsOffsets(

719 const String16Vector& lower_terms,	714 const String16Vector& lower_terms,

720 WordStarts* lower_terms_to_word_starts_offsets) {	715 WordStarts* lower_terms_to_word_starts_offsets) {

721 // Calculate offsets for each term. For instance, the offset for	716 // Calculate offsets for each term. For instance, the offset for

722 // ".net" should be 1, indicating that the actual word-part of the term	717 // ".net" should be 1, indicating that the actual word-part of the term

(...skipping 558 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1281 // First cut: typed count, visit count, recency.	1276 // First cut: typed count, visit count, recency.

1282 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1277 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1283 // recently visited (within the last 12/24 hours) as highly important. Get	1278 // recently visited (within the last 12/24 hours) as highly important. Get

1284 // input from mpearson.	1279 // input from mpearson.

1285 if (r1.typed_count() != r2.typed_count())	1280 if (r1.typed_count() != r2.typed_count())

1286 return (r1.typed_count() > r2.typed_count());	1281 return (r1.typed_count() > r2.typed_count());

1287 if (r1.visit_count() != r2.visit_count())	1282 if (r1.visit_count() != r2.visit_count())

1288 return (r1.visit_count() > r2.visit_count());	1283 return (r1.visit_count() > r2.visit_count());

1289 return (r1.last_visit() > r2.last_visit());	1284 return (r1.last_visit() > r2.last_visit());

1290 }	1285 }

OLD	NEW

« no previous file with comments | « components/omnibox/browser/in_memory_url_index_types.h ('k') | no next file » | no next file with comments »