components/omnibox/browser/url_index_private_data.cc - Issue 2337953002: Optimizing HQP using vectors manually. This is just a CL to be able

Unified Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2337953002: Optimizing HQP using vectors manually. This is just a CL to be able (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: components/omnibox/browser/url_index_private_data.cc

diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc

index 6ca56db8c245fcce787c720aeaa98eb36e788d4b..1f51789e1d2ea10aea0f26fa66f5efd86414ade0 100644

--- a/components/omnibox/browser/url_index_private_data.cc

+++ b/components/omnibox/browser/url_index_private_data.cc

@@ -522,12 +522,13 @@ URLIndexPrivateData::~URLIndexPrivateData() {}

HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(

const String16Vector& unsorted_words) {

+ using HistoryIDMoveIterator = std::move_iterator<HistoryIDVector::iterator>;

// Break the terms down into individual terms (words), get the candidate

// set for each term, and intersect each to get a final candidate list.

// Note that a single 'term' from the user's perspective might be

// a string like "http://www.somewebsite.com" which, from our perspective,

// is four words: 'http', 'www', 'somewebsite', and 'com'.

- HistoryIDSet history_id_set;

+ HistoryIDVector history_id_set;

String16Vector words(unsorted_words);

// Sort the words into the longest first as such are likely to narrow down

// the results quicker. Also, single character words are the most expensive

@@ -536,33 +537,36 @@ HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(

for (String16Vector::iterator iter = words.begin(); iter != words.end();

++iter) {

base::string16 uni_word = *iter;

- HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);

+ HistoryIDVector term_history_set = HistoryIDsForTerm(uni_word);

if (term_history_set.empty()) {

history_id_set.clear();

break;

}

if (iter == words.begin()) {

- history_id_set.swap(term_history_set);

+ history_id_set = std::move(term_history_set);

} else {

- HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(

- history_id_set, term_history_set);

- history_id_set.swap(new_history_id_set);

+ history_id_set.erase(

+ std::set_intersection(HistoryIDMoveIterator(term_history_set.begin()),

+ HistoryIDMoveIterator(term_history_set.end()),

+ history_id_set.begin(), history_id_set.end(),

+ history_id_set.begin()),

+ history_id_set.end());

}

- return history_id_set;

+ return HistoryIDSet(history_id_set.begin(), history_id_set.end());

}

-HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

+HistoryIDVector URLIndexPrivateData::HistoryIDsForTerm(

const base::string16& term) {

if (term.empty())

- return HistoryIDSet();

+ return {};

// TODO(mrossetti): Consider optimizing for very common terms such as

// 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently

// occuring words in the user's searches.

size_t term_length = term.length();

- WordIDSet word_id_set;

+ WordIDVector word_id_set;

if (term_length > 1) {

// See if this term or a prefix thereof is present in the cache.

base::string16 term_lower = base::i18n::ToLower(term);

@@ -595,7 +599,7 @@ HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

// as there will be no history results for the full term.

if (best_prefix->second.history_id_set_.empty()) {

search_term_cache_[term] = SearchTermCacheItem();

- return HistoryIDSet();

+ return {};

}

word_id_set = best_prefix->second.word_id_set_;

prefix_chars = Char16SetFromString16(best_prefix->first);

@@ -609,51 +613,54 @@ HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

// Reduce the word set with any leftover, unprocessed characters.

if (!unique_chars.empty()) {

- WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));

+ WordIDVector leftover_set(WordIDSetForTermChars(unique_chars));

// We might come up empty on the leftovers.

if (leftover_set.empty()) {

search_term_cache_[term] = SearchTermCacheItem();

- return HistoryIDSet();

+ return {};

}

// Or there may not have been a prefix from which to start.

if (prefix_chars.empty()) {

word_id_set.swap(leftover_set);

} else {

- WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

- word_id_set, leftover_set);

- word_id_set.swap(new_word_id_set);

+ word_id_set.erase(

+ std::set_intersection(word_id_set.begin(), word_id_set.end(),

+ leftover_set.begin(), leftover_set.end(),

+ word_id_set.begin()),

+ word_id_set.end());

}

// We must filter the word list because the resulting word set surely

// contains words which do not have the search term as a proper subset.

- for (WordIDSet::iterator word_set_iter = word_id_set.begin();

- word_set_iter != word_id_set.end(); ) {

- if (word_list_[*word_set_iter].find(term) == base::string16::npos)

- word_id_set.erase(word_set_iter++);

- else

- ++word_set_iter;

- }

+ word_id_set.erase(std::remove_if(word_id_set.begin(), word_id_set.end(),

+ [this, &term](const WordID& id) {

+ return word_list_[id].find(term) ==

+ base::string16::npos;

+ }),

+ word_id_set.end());

} else {

word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));

}

// If any words resulted then we can compose a set of history IDs by unioning

// the sets from each word.

- HistoryIDSet history_id_set;

- if (!word_id_set.empty()) {

- for (WordIDSet::iterator word_id_iter = word_id_set.begin();

- word_id_iter != word_id_set.end(); ++word_id_iter) {

- WordID word_id = *word_id_iter;

- WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

- if (word_iter != word_id_history_map_.end()) {

- HistoryIDSet& word_history_id_set(word_iter->second);

- history_id_set.insert(word_history_id_set.begin(),

- word_history_id_set.end());

- }

+ HistoryIDVector history_id_set;

+ for (const WordID word_id : word_id_set) {

+ WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);

+ if (word_iter != word_id_history_map_.end()) {

+ HistoryIDSet& word_history_id_set(word_iter->second);

+ history_id_set.insert(history_id_set.end(), word_history_id_set.begin(),

+ word_history_id_set.end());

}

+ std::sort(history_id_set.begin(), history_id_set.end());

+ history_id_set.erase(

+ std::unique(history_id_set.begin(), history_id_set.end()),

+ history_id_set.end());

// Record a new cache entry for this word if the term is longer than

// a single character.

if (term_length > 1)

@@ -662,9 +669,9 @@ HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(

return history_id_set;

}

-WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

+WordIDVector URLIndexPrivateData::WordIDSetForTermChars(

const Char16Set& term_chars) {

- WordIDSet word_id_set;

+ WordIDVector word_id_set;

for (Char16Set::const_iterator c_iter = term_chars.begin();

c_iter != term_chars.end(); ++c_iter) {

CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);

@@ -683,12 +690,13 @@ WordIDSet URLIndexPrivateData::WordIDSetForTermChars(

if (c_iter == term_chars.begin()) {

// First character results becomes base set of results.

- word_id_set = char_word_id_set;

+ word_id_set = {char_word_id_set.begin(), char_word_id_set.end()};

} else {

- // Subsequent character results get intersected in.

- WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(

- word_id_set, char_word_id_set);

- word_id_set.swap(new_word_id_set);

+ word_id_set.erase(

+ std::set_intersection(word_id_set.begin(), word_id_set.end(),

+ char_word_id_set.begin(),

+ char_word_id_set.end(), word_id_set.begin()),

+ word_id_set.end());

}

return word_id_set;

@@ -1255,10 +1263,11 @@ bool URLIndexPrivateData::URLSchemeIsWhitelisted(

// SearchTermCacheItem ---------------------------------------------------------

URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

- const WordIDSet& word_id_set,

- const HistoryIDSet& history_id_set)

- : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) {

+ WordIDVector word_id_set,

+ HistoryIDVector history_id_set)

+ : word_id_set_(std::move(word_id_set)),

+ history_id_set_(std::move(history_id_set)),

+ used_(true) {}

URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) {

}

« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »