Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1895)

Unified Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2337953002: Optimizing HQP using vectors manually. This is just a CL to be able (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/omnibox/browser/url_index_private_data.cc
diff --git a/components/omnibox/browser/url_index_private_data.cc b/components/omnibox/browser/url_index_private_data.cc
index 6ca56db8c245fcce787c720aeaa98eb36e788d4b..1f51789e1d2ea10aea0f26fa66f5efd86414ade0 100644
--- a/components/omnibox/browser/url_index_private_data.cc
+++ b/components/omnibox/browser/url_index_private_data.cc
@@ -522,12 +522,13 @@ URLIndexPrivateData::~URLIndexPrivateData() {}
HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
const String16Vector& unsorted_words) {
+ using HistoryIDMoveIterator = std::move_iterator<HistoryIDVector::iterator>;
// Break the terms down into individual terms (words), get the candidate
// set for each term, and intersect each to get a final candidate list.
// Note that a single 'term' from the user's perspective might be
// a string like "http://www.somewebsite.com" which, from our perspective,
// is four words: 'http', 'www', 'somewebsite', and 'com'.
- HistoryIDSet history_id_set;
+ HistoryIDVector history_id_set;
String16Vector words(unsorted_words);
// Sort the words into the longest first as such are likely to narrow down
// the results quicker. Also, single character words are the most expensive
@@ -536,33 +537,36 @@ HistoryIDSet URLIndexPrivateData::HistoryIDSetFromWords(
for (String16Vector::iterator iter = words.begin(); iter != words.end();
++iter) {
base::string16 uni_word = *iter;
- HistoryIDSet term_history_set = HistoryIDsForTerm(uni_word);
+ HistoryIDVector term_history_set = HistoryIDsForTerm(uni_word);
if (term_history_set.empty()) {
history_id_set.clear();
break;
}
if (iter == words.begin()) {
- history_id_set.swap(term_history_set);
+ history_id_set = std::move(term_history_set);
} else {
- HistoryIDSet new_history_id_set = base::STLSetIntersection<HistoryIDSet>(
- history_id_set, term_history_set);
- history_id_set.swap(new_history_id_set);
+ history_id_set.erase(
+ std::set_intersection(HistoryIDMoveIterator(term_history_set.begin()),
+ HistoryIDMoveIterator(term_history_set.end()),
+ history_id_set.begin(), history_id_set.end(),
+ history_id_set.begin()),
+ history_id_set.end());
}
}
- return history_id_set;
+ return HistoryIDSet(history_id_set.begin(), history_id_set.end());
}
-HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
+HistoryIDVector URLIndexPrivateData::HistoryIDsForTerm(
const base::string16& term) {
if (term.empty())
- return HistoryIDSet();
+ return {};
// TODO(mrossetti): Consider optimizing for very common terms such as
// 'http[s]', 'www', 'com', etc. Or collect the top 100 more frequently
// occuring words in the user's searches.
size_t term_length = term.length();
- WordIDSet word_id_set;
+ WordIDVector word_id_set;
if (term_length > 1) {
// See if this term or a prefix thereof is present in the cache.
base::string16 term_lower = base::i18n::ToLower(term);
@@ -595,7 +599,7 @@ HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
// as there will be no history results for the full term.
if (best_prefix->second.history_id_set_.empty()) {
search_term_cache_[term] = SearchTermCacheItem();
- return HistoryIDSet();
+ return {};
}
word_id_set = best_prefix->second.word_id_set_;
prefix_chars = Char16SetFromString16(best_prefix->first);
@@ -609,51 +613,54 @@ HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
// Reduce the word set with any leftover, unprocessed characters.
if (!unique_chars.empty()) {
- WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
+ WordIDVector leftover_set(WordIDSetForTermChars(unique_chars));
// We might come up empty on the leftovers.
if (leftover_set.empty()) {
search_term_cache_[term] = SearchTermCacheItem();
- return HistoryIDSet();
+ return {};
}
// Or there may not have been a prefix from which to start.
if (prefix_chars.empty()) {
word_id_set.swap(leftover_set);
} else {
- WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
- word_id_set, leftover_set);
- word_id_set.swap(new_word_id_set);
+ word_id_set.erase(
+ std::set_intersection(word_id_set.begin(), word_id_set.end(),
+ leftover_set.begin(), leftover_set.end(),
+ word_id_set.begin()),
+ word_id_set.end());
}
}
// We must filter the word list because the resulting word set surely
// contains words which do not have the search term as a proper subset.
- for (WordIDSet::iterator word_set_iter = word_id_set.begin();
- word_set_iter != word_id_set.end(); ) {
- if (word_list_[*word_set_iter].find(term) == base::string16::npos)
- word_id_set.erase(word_set_iter++);
- else
- ++word_set_iter;
- }
+ word_id_set.erase(std::remove_if(word_id_set.begin(), word_id_set.end(),
+ [this, &term](const WordID& id) {
+ return word_list_[id].find(term) ==
+ base::string16::npos;
+ }),
+ word_id_set.end());
} else {
word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
}
// If any words resulted then we can compose a set of history IDs by unioning
// the sets from each word.
- HistoryIDSet history_id_set;
- if (!word_id_set.empty()) {
- for (WordIDSet::iterator word_id_iter = word_id_set.begin();
- word_id_iter != word_id_set.end(); ++word_id_iter) {
- WordID word_id = *word_id_iter;
- WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
- if (word_iter != word_id_history_map_.end()) {
- HistoryIDSet& word_history_id_set(word_iter->second);
- history_id_set.insert(word_history_id_set.begin(),
- word_history_id_set.end());
- }
+ HistoryIDVector history_id_set;
+
+ for (const WordID word_id : word_id_set) {
+ WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
+ if (word_iter != word_id_history_map_.end()) {
+ HistoryIDSet& word_history_id_set(word_iter->second);
+ history_id_set.insert(history_id_set.end(), word_history_id_set.begin(),
+ word_history_id_set.end());
}
}
+ std::sort(history_id_set.begin(), history_id_set.end());
+ history_id_set.erase(
+ std::unique(history_id_set.begin(), history_id_set.end()),
+ history_id_set.end());
+
// Record a new cache entry for this word if the term is longer than
// a single character.
if (term_length > 1)
@@ -662,9 +669,9 @@ HistoryIDSet URLIndexPrivateData::HistoryIDsForTerm(
return history_id_set;
}
-WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
+WordIDVector URLIndexPrivateData::WordIDSetForTermChars(
const Char16Set& term_chars) {
- WordIDSet word_id_set;
+ WordIDVector word_id_set;
for (Char16Set::const_iterator c_iter = term_chars.begin();
c_iter != term_chars.end(); ++c_iter) {
CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
@@ -683,12 +690,13 @@ WordIDSet URLIndexPrivateData::WordIDSetForTermChars(
if (c_iter == term_chars.begin()) {
// First character results becomes base set of results.
- word_id_set = char_word_id_set;
+ word_id_set = {char_word_id_set.begin(), char_word_id_set.end()};
} else {
- // Subsequent character results get intersected in.
- WordIDSet new_word_id_set = base::STLSetIntersection<WordIDSet>(
- word_id_set, char_word_id_set);
- word_id_set.swap(new_word_id_set);
+ word_id_set.erase(
+ std::set_intersection(word_id_set.begin(), word_id_set.end(),
+ char_word_id_set.begin(),
+ char_word_id_set.end(), word_id_set.begin()),
+ word_id_set.end());
}
}
return word_id_set;
@@ -1255,10 +1263,11 @@ bool URLIndexPrivateData::URLSchemeIsWhitelisted(
// SearchTermCacheItem ---------------------------------------------------------
URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(
- const WordIDSet& word_id_set,
- const HistoryIDSet& history_id_set)
- : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) {
-}
+ WordIDVector word_id_set,
+ HistoryIDVector history_id_set)
+ : word_id_set_(std::move(word_id_set)),
+ history_id_set_(std::move(history_id_set)),
+ used_(true) {}
URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) {
}
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698