| Index: chrome/browser/history/in_memory_url_index.cc
|
| ===================================================================
|
| --- chrome/browser/history/in_memory_url_index.cc (revision 107888)
|
| +++ chrome/browser/history/in_memory_url_index.cc (working copy)
|
| @@ -11,7 +11,6 @@
|
| #include <numeric>
|
|
|
| #include "base/file_util.h"
|
| -#include "base/i18n/break_iterator.h"
|
| #include "base/i18n/case_conversion.h"
|
| #include "base/metrics/histogram.h"
|
| #include "base/string_util.h"
|
| @@ -59,23 +58,6 @@
|
| // each of these scores for each factor.
|
| const int kScoreRank[] = { 1425, 1200, 900, 400 };
|
|
|
| -ScoredHistoryMatch::ScoredHistoryMatch()
|
| - : raw_score(0),
|
| - can_inline(false) {}
|
| -
|
| -ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
|
| - : HistoryMatch(url_info, 0, false, false),
|
| - raw_score(0),
|
| - can_inline(false) {}
|
| -
|
| -ScoredHistoryMatch::~ScoredHistoryMatch() {}
|
| -
|
| -// Comparison function for sorting ScoredMatches by their scores.
|
| -bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
|
| - const ScoredHistoryMatch& m2) {
|
| - return m1.raw_score >= m2.raw_score;
|
| -}
|
| -
|
| InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem(
|
| const WordIDSet& word_id_set,
|
| const HistoryIDSet& history_id_set)
|
| @@ -88,11 +70,6 @@
|
|
|
| InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {}
|
|
|
| -// Comparison function for sorting TermMatches by their offsets.
|
| -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
|
| - return m1.offset < m2.offset;
|
| -}
|
| -
|
| // Comparison function for sorting search terms by descending length.
|
| bool LengthGreater(const string16& string_a, const string16& string_b) {
|
| return string_a.length() > string_b.length();
|
| @@ -137,14 +114,14 @@
|
|
|
| InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
|
| : history_dir_(history_dir),
|
| - history_item_count_(0),
|
| + private_data_(new URLIndexPrivateData),
|
| cached_at_shutdown_(false) {
|
| InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
|
| }
|
|
|
| // Called only by unit tests.
|
| InMemoryURLIndex::InMemoryURLIndex()
|
| - : history_item_count_(0),
|
| + : private_data_(new URLIndexPrivateData),
|
| cached_at_shutdown_(false) {
|
| InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
|
| }
|
| @@ -170,7 +147,7 @@
|
|
|
| // Indexing
|
|
|
| -bool InMemoryURLIndex::Init(history::URLDatabase* history_db,
|
| +bool InMemoryURLIndex::Init(URLDatabase* history_db,
|
| const std::string& languages) {
|
| // TODO(mrossetti): Register for profile/language change notifications.
|
| languages_ = languages;
|
| @@ -183,33 +160,47 @@
|
| cached_at_shutdown_ = true;
|
| }
|
|
|
| -bool InMemoryURLIndex::IndexRow(const URLRow& row) {
|
| +void InMemoryURLIndex::IndexRow(const URLRow& row) {
|
| const GURL& gurl(row.url());
|
|
|
| // Index only URLs with a whitelisted scheme.
|
| if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
|
| - return true;
|
| + return;
|
|
|
| + URLID row_id = row.id();
|
| + // Strip out username and password before saving and indexing.
|
| string16 url(net::FormatUrl(gurl, languages_,
|
| net::kFormatUrlOmitUsernamePassword,
|
| UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
|
| NULL, NULL, NULL));
|
|
|
| - HistoryID history_id = static_cast<HistoryID>(row.id());
|
| - DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());
|
| + HistoryID history_id = static_cast<HistoryID>(row_id);
|
| + DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
|
|
|
| // Add the row for quick lookup in the history info store.
|
| - URLRow new_row(GURL(url), row.id());
|
| + URLRow new_row(GURL(url), row_id);
|
| new_row.set_visit_count(row.visit_count());
|
| new_row.set_typed_count(row.typed_count());
|
| new_row.set_last_visit(row.last_visit());
|
| new_row.set_title(row.title());
|
| - history_info_map_[history_id] = new_row;
|
| + private_data_->history_info_map_[history_id] = new_row;
|
|
|
| + // Index the words contained in the URL and title of the row.
|
| + AddRowWordsToIndex(new_row);
|
| + return;
|
| +}
|
| +
|
| +void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) {
|
| + HistoryID history_id = static_cast<HistoryID>(row.id());
|
| // Split URL into individual, unique words then add in the title words.
|
| + const GURL& gurl(row.url());
|
| + string16 url(net::FormatUrl(gurl, languages_,
|
| + net::kFormatUrlOmitUsernamePassword,
|
| + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
|
| + NULL, NULL, NULL));
|
| url = base::i18n::ToLower(url);
|
| - String16Set url_words = WordSetFromString16(url);
|
| - String16Set title_words = WordSetFromString16(row.title());
|
| + String16Set url_words = String16SetFromString16(url);
|
| + String16Set title_words = String16SetFromString16(row.title());
|
| String16Set words;
|
| std::set_union(url_words.begin(), url_words.end(),
|
| title_words.begin(), title_words.end(),
|
| @@ -218,10 +209,50 @@
|
| word_iter != words.end(); ++word_iter)
|
| AddWordToIndex(*word_iter, history_id);
|
|
|
| - ++history_item_count_;
|
| - return true;
|
| + search_term_cache_.clear(); // Invalidate the term cache.
|
| }
|
|
|
| +void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) {
|
| + RemoveRowWordsFromIndex(row);
|
| + HistoryID history_id = static_cast<HistoryID>(row.id());
|
| + private_data_->history_info_map_.erase(history_id);
|
| +}
|
| +
|
| +void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) {
|
| + // Remove the entries in history_id_word_map_ and word_id_history_map_ for
|
| + // this row.
|
| + URLIndexPrivateData& private_data(*(private_data_.get()));
|
| + HistoryID history_id = static_cast<HistoryID>(row.id());
|
| + WordIDSet word_id_set = private_data.history_id_word_map_[history_id];
|
| + private_data.history_id_word_map_.erase(history_id);
|
| +
|
| + // Reconcile any changes to word usage.
|
| + for (WordIDSet::iterator word_id_iter = word_id_set.begin();
|
| + word_id_iter != word_id_set.end(); ++word_id_iter) {
|
| + WordID word_id = *word_id_iter;
|
| + private_data.word_id_history_map_[word_id].erase(history_id);
|
| + if (!private_data.word_id_history_map_[word_id].empty())
|
| + continue; // The word is still in use.
|
| +
|
| + // The word is no longer in use. Reconcile any changes to character usage.
|
| + string16 word = private_data.word_list_[word_id];
|
| + Char16Set characters = Char16SetFromString16(word);
|
| + for (Char16Set::iterator uni_char_iter = characters.begin();
|
| + uni_char_iter != characters.end(); ++uni_char_iter) {
|
| + char16 uni_char = *uni_char_iter;
|
| + private_data.char_word_map_[uni_char].erase(word_id);
|
| + if (private_data.char_word_map_[uni_char].empty())
|
| + private_data.char_word_map_.erase(uni_char); // No longer in use.
|
| + }
|
| +
|
| + // Complete the removal of references to the word.
|
| + private_data.word_id_history_map_.erase(word_id);
|
| + private_data.word_map_.erase(word);
|
| + private_data.word_list_[word_id] = string16();
|
| + private_data.available_words_.insert(word_id);
|
| + }
|
| +}
|
| +
|
| bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
|
| bool clear_cache) {
|
| ClearPrivateData();
|
| @@ -236,10 +267,8 @@
|
| if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
|
| return false;
|
| URLRow row;
|
| - while (history_enum.GetNextURL(&row)) {
|
| - if (!IndexRow(row))
|
| - return false;
|
| - }
|
| + while (history_enum.GetNextURL(&row))
|
| + IndexRow(row);
|
| UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
|
| base::TimeTicks::Now() - beginning_time);
|
| SaveToCacheFile();
|
| @@ -248,12 +277,7 @@
|
| }
|
|
|
| void InMemoryURLIndex::ClearPrivateData() {
|
| - history_item_count_ = 0;
|
| - word_list_.clear();
|
| - word_map_.clear();
|
| - char_word_map_.clear();
|
| - word_id_history_map_.clear();
|
| - history_info_map_.clear();
|
| + private_data_->Clear();
|
| search_term_cache_.clear();
|
| }
|
|
|
| @@ -289,10 +313,13 @@
|
|
|
| UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
|
| base::TimeTicks::Now() - beginning_time);
|
| - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);
|
| + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
|
| + private_data_->history_id_word_map_.size());
|
| UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
|
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
|
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
|
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
|
| + private_data_->word_map_.size());
|
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
|
| + private_data_->char_word_map_.size());
|
| return true;
|
| }
|
|
|
| @@ -327,29 +354,39 @@
|
| // indexed and it qualifies then it gets indexed. If it is already
|
| // indexed and still qualifies then it gets updated, otherwise it
|
| // is deleted from the index.
|
| - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
|
| - if (row_pos == history_info_map_.end()) {
|
| + HistoryInfoMap::iterator row_pos =
|
| + private_data_->history_info_map_.find(row_id);
|
| + if (row_pos == private_data_->history_info_map_.end()) {
|
| // This new row should be indexed if it qualifies.
|
| - if (RowQualifiesAsSignificant(row, base::Time()))
|
| - IndexRow(row);
|
| + URLRow new_row(row);
|
| + new_row.set_id(row_id);
|
| + if (RowQualifiesAsSignificant(new_row, base::Time()))
|
| + IndexRow(new_row);
|
| } else if (RowQualifiesAsSignificant(row, base::Time())) {
|
| // This indexed row still qualifies and will be re-indexed.
|
| // The url won't have changed but the title, visit count, etc.
|
| // might have changed.
|
| - URLRow& old_row = row_pos->second;
|
| - old_row.set_visit_count(row.visit_count());
|
| - old_row.set_typed_count(row.typed_count());
|
| - old_row.set_last_visit(row.last_visit());
|
| - // TODO(mrossetti): When we start indexing the title the next line
|
| - // will need attention.
|
| - old_row.set_title(row.title());
|
| + URLRow& updated_row = row_pos->second;
|
| + updated_row.set_visit_count(row.visit_count());
|
| + updated_row.set_typed_count(row.typed_count());
|
| + updated_row.set_last_visit(row.last_visit());
|
| + // While the URL is guaranteed to remain stable, the title may have changed.
|
| + // If so, then we need to update the index with the changed words.
|
| + if (updated_row.title() != row.title()) {
|
| + // Clear all words associated with this row and re-index both the
|
| + // URL and title.
|
| + RemoveRowWordsFromIndex(updated_row);
|
| + updated_row.set_title(row.title());
|
| + AddRowWordsToIndex(updated_row);
|
| + }
|
| } else {
|
| - // This indexed row no longer qualifies and will be de-indexed.
|
| - history_info_map_.erase(row_id);
|
| + // This indexed row no longer qualifies and will be de-indexed by
|
| + // clearing all words associated with this row.
|
| + URLRow& removed_row = row_pos->second;
|
| + RemoveRowFromIndex(removed_row);
|
| }
|
| // This invalidates the cache.
|
| search_term_cache_.clear();
|
| - // TODO(mrossetti): Record this transaction in the cache.
|
| }
|
|
|
| void InMemoryURLIndex::DeleteURL(URLID row_id) {
|
| @@ -358,10 +395,9 @@
|
| // hits against this row until that map is rebuilt, but since the
|
| // history_info_map_ no longer references the row no erroneous results
|
| // will propagate to the user.
|
| - history_info_map_.erase(row_id);
|
| + private_data_->history_info_map_.erase(row_id);
|
| // This invalidates the word cache.
|
| search_term_cache_.clear();
|
| - // TODO(mrossetti): Record this transaction in the cache.
|
| }
|
|
|
| // Searching
|
| @@ -369,6 +405,12 @@
|
| ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
|
| const String16Vector& terms) {
|
| ScoredHistoryMatches scored_items;
|
| +
|
| + // Do nothing if we have indexed no words (probably because we've not been
|
| + // initialized yet).
|
| + if (private_data_->word_list_.empty())
|
| + return scored_items;
|
| +
|
| if (!terms.empty()) {
|
| // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
|
| // approach.
|
| @@ -426,7 +468,7 @@
|
| iter->second.used_ = false;
|
| }
|
|
|
| -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
|
| +HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
|
| const string16& uni_string) {
|
| // Break the terms down into individual terms (words), get the candidate
|
| // set for each term, and intersect each to get a final candidate list.
|
| @@ -434,7 +476,7 @@
|
| // a string like "http://www.somewebsite.com" which, from our perspective,
|
| // is four words: 'http', 'www', 'somewebsite', and 'com'.
|
| HistoryIDSet history_id_set;
|
| - String16Vector terms = WordVectorFromString16(uni_string, true);
|
| + String16Vector terms = String16VectorFromString16(uni_string, true);
|
| // Sort the terms into the longest first as such are likely to narrow down
|
| // the results quicker. Also, single character terms are the most expensive
|
| // to process so save them for last.
|
| @@ -461,7 +503,7 @@
|
| return history_id_set;
|
| }
|
|
|
| -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
|
| +HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
|
| const string16& term) {
|
| if (term.empty())
|
| return HistoryIDSet();
|
| @@ -471,7 +513,7 @@
|
| // occuring words in the user's searches.
|
|
|
| size_t term_length = term.length();
|
| - InMemoryURLIndex::WordIDSet word_id_set;
|
| + WordIDSet word_id_set;
|
| if (term_length > 1) {
|
| // See if this term or a prefix thereof is present in the cache.
|
| SearchTermCacheMap::iterator best_prefix(search_term_cache_.end());
|
| @@ -517,7 +559,8 @@
|
|
|
| // Reduce the word set with any leftover, unprocessed characters.
|
| if (!unique_chars.empty()) {
|
| - WordIDSet leftover_set(WordIDSetForTermChars(unique_chars));
|
| + WordIDSet leftover_set(
|
| + private_data_->WordIDSetForTermChars(unique_chars));
|
| // We might come up empty on the leftovers.
|
| if (leftover_set.empty()) {
|
| search_term_cache_[term] = SearchTermCacheItem();
|
| @@ -540,13 +583,15 @@
|
| // contains words which do not have the search term as a proper subset.
|
| for (WordIDSet::iterator word_set_iter = word_id_set.begin();
|
| word_set_iter != word_id_set.end(); ) {
|
| - if (word_list_[*word_set_iter].find(term) == string16::npos)
|
| + if (private_data_->word_list_[*word_set_iter].find(term) ==
|
| + string16::npos)
|
| word_id_set.erase(word_set_iter++);
|
| else
|
| ++word_set_iter;
|
| }
|
| } else {
|
| - word_id_set = WordIDSetForTermChars(Char16SetFromString16(term));
|
| + word_id_set =
|
| + private_data_->WordIDSetForTermChars(Char16SetFromString16(term));
|
| }
|
|
|
| // If any words resulted then we can compose a set of history IDs by unioning
|
| @@ -556,8 +601,9 @@
|
| for (WordIDSet::iterator word_id_iter = word_id_set.begin();
|
| word_id_iter != word_id_set.end(); ++word_id_iter) {
|
| WordID word_id = *word_id_iter;
|
| - WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
|
| - if (word_iter != word_id_history_map_.end()) {
|
| + WordIDHistoryMap::iterator word_iter =
|
| + private_data_->word_id_history_map_.find(word_id);
|
| + if (word_iter != private_data_->word_id_history_map_.end()) {
|
| HistoryIDSet& word_history_id_set(word_iter->second);
|
| history_id_set.insert(word_history_id_set.begin(),
|
| word_history_id_set.end());
|
| @@ -575,85 +621,53 @@
|
|
|
| // Utility Functions
|
|
|
| -// static
|
| -InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
|
| - const string16& uni_string) {
|
| - const size_t kMaxWordLength = 64;
|
| - String16Vector words = WordVectorFromString16(uni_string, false);
|
| - String16Set word_set;
|
| - for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
|
| - ++iter)
|
| - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength));
|
| - return word_set;
|
| -}
|
| -
|
| -// static
|
| -InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
|
| - const string16& uni_string,
|
| - bool break_on_space) {
|
| - base::i18n::BreakIterator iter(
|
| - uni_string,
|
| - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE
|
| - : base::i18n::BreakIterator::BREAK_WORD);
|
| - String16Vector words;
|
| - if (!iter.Init())
|
| - return words;
|
| - while (iter.Advance()) {
|
| - if (break_on_space || iter.IsWord()) {
|
| - string16 word = iter.GetString();
|
| - if (break_on_space)
|
| - TrimWhitespace(word, TRIM_ALL, &word);
|
| - if (!word.empty())
|
| - words.push_back(word);
|
| - }
|
| - }
|
| - return words;
|
| -}
|
| -
|
| -// static
|
| -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
|
| - const string16& term) {
|
| - Char16Set characters;
|
| - for (string16::const_iterator iter = term.begin(); iter != term.end();
|
| - ++iter)
|
| - characters.insert(*iter);
|
| - return characters;
|
| -}
|
| -
|
| void InMemoryURLIndex::AddWordToIndex(const string16& term,
|
| HistoryID history_id) {
|
| - WordMap::iterator word_pos = word_map_.find(term);
|
| - if (word_pos != word_map_.end())
|
| + WordMap::iterator word_pos = private_data_->word_map_.find(term);
|
| + if (word_pos != private_data_->word_map_.end())
|
| UpdateWordHistory(word_pos->second, history_id);
|
| else
|
| AddWordHistory(term, history_id);
|
| }
|
|
|
| void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
|
| - WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
|
| - DCHECK(history_pos != word_id_history_map_.end());
|
| - HistoryIDSet& history_id_set(history_pos->second);
|
| - history_id_set.insert(history_id);
|
| + WordIDHistoryMap::iterator history_pos =
|
| + private_data_->word_id_history_map_.find(word_id);
|
| + DCHECK(history_pos != private_data_->word_id_history_map_.end());
|
| + HistoryIDSet& history_id_set(history_pos->second);
|
| + history_id_set.insert(history_id);
|
| + private_data_->AddToHistoryIDWordMap(history_id, word_id);
|
| }
|
|
|
| // Add a new word to the word list and the word map, and then create a
|
| // new entry in the word/history map.
|
| void InMemoryURLIndex::AddWordHistory(const string16& term,
|
| HistoryID history_id) {
|
| - word_list_.push_back(term);
|
| - WordID word_id = word_list_.size() - 1;
|
| - word_map_[term] = word_id;
|
| + URLIndexPrivateData& private_data(*(private_data_.get()));
|
| + WordID word_id = private_data.word_list_.size();
|
| + if (private_data.available_words_.empty()) {
|
| + private_data.word_list_.push_back(term);
|
| + } else {
|
| + word_id = *(private_data.available_words_.begin());
|
| + private_data.word_list_[word_id] = term;
|
| + private_data.available_words_.erase(word_id);
|
| + }
|
| + private_data.word_map_[term] = word_id;
|
| +
|
| HistoryIDSet history_id_set;
|
| history_id_set.insert(history_id);
|
| - word_id_history_map_[word_id] = history_id_set;
|
| + private_data.word_id_history_map_[word_id] = history_id_set;
|
| + private_data.AddToHistoryIDWordMap(history_id, word_id);
|
| +
|
| // For each character in the newly added word (i.e. a word that is not
|
| // already in the word index), add the word to the character index.
|
| Char16Set characters = Char16SetFromString16(term);
|
| for (Char16Set::iterator uni_char_iter = characters.begin();
|
| uni_char_iter != characters.end(); ++uni_char_iter) {
|
| char16 uni_char = *uni_char_iter;
|
| - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
|
| - if (char_iter != char_word_map_.end()) {
|
| + CharWordIDMap::iterator char_iter =
|
| + private_data.char_word_map_.find(uni_char);
|
| + if (char_iter != private_data.char_word_map_.end()) {
|
| // Update existing entry in the char/word index.
|
| WordIDSet& word_id_set(char_iter->second);
|
| word_id_set.insert(word_id);
|
| @@ -661,108 +675,13 @@
|
| // Create a new entry in the char/word index.
|
| WordIDSet word_id_set;
|
| word_id_set.insert(word_id);
|
| - char_word_map_[uni_char] = word_id_set;
|
| + private_data.char_word_map_[uni_char] = word_id_set;
|
| }
|
| }
|
| }
|
|
|
| -InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
|
| - const Char16Set& term_chars) {
|
| - WordIDSet word_id_set;
|
| - for (Char16Set::const_iterator c_iter = term_chars.begin();
|
| - c_iter != term_chars.end(); ++c_iter) {
|
| - CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter);
|
| - if (char_iter == char_word_map_.end()) {
|
| - // A character was not found so there are no matching results: bail.
|
| - word_id_set.clear();
|
| - break;
|
| - }
|
| - WordIDSet& char_word_id_set(char_iter->second);
|
| - // It is possible for there to no longer be any words associated with
|
| - // a particular character. Give up in that case.
|
| - if (char_word_id_set.empty()) {
|
| - word_id_set.clear();
|
| - break;
|
| - }
|
| -
|
| - if (c_iter == term_chars.begin()) {
|
| - // First character results becomes base set of results.
|
| - word_id_set = char_word_id_set;
|
| - } else {
|
| - // Subsequent character results get intersected in.
|
| - WordIDSet new_word_id_set;
|
| - std::set_intersection(word_id_set.begin(), word_id_set.end(),
|
| - char_word_id_set.begin(), char_word_id_set.end(),
|
| - std::inserter(new_word_id_set,
|
| - new_word_id_set.begin()));
|
| - word_id_set.swap(new_word_id_set);
|
| - }
|
| - }
|
| - return word_id_set;
|
| -}
|
| -
|
| // static
|
| -TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
|
| - const string16& string,
|
| - int term_num) {
|
| - const size_t kMaxCompareLength = 2048;
|
| - const string16& short_string = (string.length() > kMaxCompareLength) ?
|
| - string.substr(0, kMaxCompareLength) : string;
|
| - TermMatches matches;
|
| - for (size_t location = short_string.find(term); location != string16::npos;
|
| - location = short_string.find(term, location + 1)) {
|
| - matches.push_back(TermMatch(term_num, location, term.length()));
|
| - }
|
| - return matches;
|
| -}
|
| -
|
| -// static
|
| -TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
|
| - if (matches.empty())
|
| - return matches;
|
| - TermMatches sorted_matches = matches;
|
| - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
|
| - TermMatches clean_matches;
|
| - TermMatch last_match = sorted_matches[0];
|
| - clean_matches.push_back(last_match);
|
| - for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
|
| - iter != sorted_matches.end(); ++iter) {
|
| - if (iter->offset >= last_match.offset + last_match.length) {
|
| - last_match = *iter;
|
| - clean_matches.push_back(last_match);
|
| - }
|
| - }
|
| - return clean_matches;
|
| -}
|
| -
|
| -// static
|
| -std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
|
| - const TermMatches& matches) {
|
| - std::vector<size_t> offsets;
|
| - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
|
| - offsets.push_back(i->offset);
|
| - return offsets;
|
| -}
|
| -
|
| -// static
|
| -TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
|
| - const TermMatches& matches,
|
| - const std::vector<size_t>& offsets) {
|
| - DCHECK_EQ(matches.size(), offsets.size());
|
| - TermMatches new_matches;
|
| - std::vector<size_t>::const_iterator offset_iter = offsets.begin();
|
| - for (TermMatches::const_iterator term_iter = matches.begin();
|
| - term_iter != matches.end(); ++term_iter, ++offset_iter) {
|
| - if (*offset_iter != string16::npos) {
|
| - TermMatch new_match(*term_iter);
|
| - new_match.offset = *offset_iter;
|
| - new_matches.push_back(new_match);
|
| - }
|
| - }
|
| - return new_matches;
|
| -}
|
| -
|
| -// static
|
| +// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch.
|
| ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
|
| const URLRow& row,
|
| const String16Vector& terms) {
|
| @@ -791,8 +710,8 @@
|
| }
|
|
|
| // Sort matches by offset and eliminate any which overlap.
|
| - match.url_matches = SortAndDeoverlap(match.url_matches);
|
| - match.title_matches = SortAndDeoverlap(match.title_matches);
|
| + match.url_matches = SortAndDeoverlapMatches(match.url_matches);
|
| + match.title_matches = SortAndDeoverlapMatches(match.title_matches);
|
|
|
| // We should not (currently) inline autocomplete a result unless both of the
|
| // following are true:
|
| @@ -906,19 +825,17 @@
|
| const InMemoryURLIndex& index,
|
| const String16Vector& lower_terms)
|
| : index_(index),
|
| - lower_terms_(lower_terms) {
|
| -}
|
| + lower_terms_(lower_terms) {}
|
|
|
| InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
|
|
|
| -void InMemoryURLIndex::AddHistoryMatch::operator()(
|
| - const InMemoryURLIndex::HistoryID history_id) {
|
| +void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) {
|
| HistoryInfoMap::const_iterator hist_pos =
|
| - index_.history_info_map_.find(history_id);
|
| + index_.private_data_->history_info_map_.find(history_id);
|
| // Note that a history_id may be present in the word_id_history_map_ yet not
|
| // be found in the history_info_map_. This occurs when an item has been
|
| // deleted by the user or the item no longer qualifies as a quick result.
|
| - if (hist_pos != index_.history_info_map_.end()) {
|
| + if (hist_pos != index_.private_data_->history_info_map_.end()) {
|
| const URLRow& hist_item = hist_pos->second;
|
| ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
|
| if (match.raw_score > 0)
|
| @@ -940,7 +857,9 @@
|
| void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
|
| DCHECK(cache);
|
| cache->set_timestamp(base::Time::Now().ToInternalValue());
|
| - cache->set_history_item_count(history_item_count_);
|
| + // history_item_count_ is no longer used but rather than change the protobuf
|
| + // definition use a placeholder. This will go away with the switch to SQLite.
|
| + cache->set_history_item_count(0);
|
| SaveWordList(cache);
|
| SaveWordMap(cache);
|
| SaveCharWordMap(cache);
|
| @@ -951,20 +870,18 @@
|
| bool InMemoryURLIndex::RestorePrivateData(
|
| const InMemoryURLIndexCacheItem& cache) {
|
| last_saved_ = base::Time::FromInternalValue(cache.timestamp());
|
| - history_item_count_ = cache.history_item_count();
|
| - return (history_item_count_ == 0) || (RestoreWordList(cache) &&
|
| - RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
|
| - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
|
| + return RestoreWordList(cache) && RestoreWordMap(cache) &&
|
| + RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
|
| + RestoreHistoryInfoMap(cache);
|
| }
|
|
|
| -
|
| void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
|
| - if (word_list_.empty())
|
| + if (private_data_->word_list_.empty())
|
| return;
|
| WordListItem* list_item = cache->mutable_word_list();
|
| - list_item->set_word_count(word_list_.size());
|
| - for (String16Vector::const_iterator iter = word_list_.begin();
|
| - iter != word_list_.end(); ++iter)
|
| + list_item->set_word_count(private_data_->word_list_.size());
|
| + for (String16Vector::const_iterator iter = private_data_->word_list_.begin();
|
| + iter != private_data_->word_list_.end(); ++iter)
|
| list_item->add_word(UTF16ToUTF8(*iter));
|
| }
|
|
|
| @@ -979,17 +896,17 @@
|
| const RepeatedPtrField<std::string>& words(list_item.word());
|
| for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
|
| iter != words.end(); ++iter)
|
| - word_list_.push_back(UTF8ToUTF16(*iter));
|
| + private_data_->word_list_.push_back(UTF8ToUTF16(*iter));
|
| return true;
|
| }
|
|
|
| void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
|
| - if (word_map_.empty())
|
| + if (private_data_->word_map_.empty())
|
| return;
|
| WordMapItem* map_item = cache->mutable_word_map();
|
| - map_item->set_item_count(word_map_.size());
|
| - for (WordMap::const_iterator iter = word_map_.begin();
|
| - iter != word_map_.end(); ++iter) {
|
| + map_item->set_item_count(private_data_->word_map_.size());
|
| + for (WordMap::const_iterator iter = private_data_->word_map_.begin();
|
| + iter != private_data_->word_map_.end(); ++iter) {
|
| WordMapEntry* map_entry = map_item->add_word_map_entry();
|
| map_entry->set_word(UTF16ToUTF8(iter->first));
|
| map_entry->set_word_id(iter->second);
|
| @@ -1007,17 +924,18 @@
|
| const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
|
| for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
|
| iter != entries.end(); ++iter)
|
| - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
|
| + private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
|
| return true;
|
| }
|
|
|
| void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
|
| - if (char_word_map_.empty())
|
| + if (private_data_->char_word_map_.empty())
|
| return;
|
| CharWordMapItem* map_item = cache->mutable_char_word_map();
|
| - map_item->set_item_count(char_word_map_.size());
|
| - for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
|
| - iter != char_word_map_.end(); ++iter) {
|
| + map_item->set_item_count(private_data_->char_word_map_.size());
|
| + for (CharWordIDMap::const_iterator iter =
|
| + private_data_->char_word_map_.begin();
|
| + iter != private_data_->char_word_map_.end(); ++iter) {
|
| CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
|
| map_entry->set_char_16(iter->first);
|
| const WordIDSet& word_id_set(iter->second);
|
| @@ -1051,19 +969,20 @@
|
| for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
|
| jiter != word_ids.end(); ++jiter)
|
| word_id_set.insert(*jiter);
|
| - char_word_map_[uni_char] = word_id_set;
|
| + private_data_->char_word_map_[uni_char] = word_id_set;
|
| }
|
| return true;
|
| }
|
|
|
| void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
|
| const {
|
| - if (word_id_history_map_.empty())
|
| + if (private_data_->word_id_history_map_.empty())
|
| return;
|
| WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
|
| - map_item->set_item_count(word_id_history_map_.size());
|
| - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
|
| - iter != word_id_history_map_.end(); ++iter) {
|
| + map_item->set_item_count(private_data_->word_id_history_map_.size());
|
| + for (WordIDHistoryMap::const_iterator iter =
|
| + private_data_->word_id_history_map_.begin();
|
| + iter != private_data_->word_id_history_map_.end(); ++iter) {
|
| WordIDHistoryMapEntry* map_entry =
|
| map_item->add_word_id_history_map_entry();
|
| map_entry->set_word_id(iter->first);
|
| @@ -1096,21 +1015,24 @@
|
| HistoryIDSet history_id_set;
|
| const RepeatedField<int64>& history_ids(iter->history_id());
|
| for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
|
| - jiter != history_ids.end(); ++jiter)
|
| + jiter != history_ids.end(); ++jiter) {
|
| history_id_set.insert(*jiter);
|
| - word_id_history_map_[word_id] = history_id_set;
|
| + private_data_->AddToHistoryIDWordMap(*jiter, word_id);
|
| + }
|
| + private_data_->word_id_history_map_[word_id] = history_id_set;
|
| }
|
| return true;
|
| }
|
|
|
| void InMemoryURLIndex::SaveHistoryInfoMap(
|
| InMemoryURLIndexCacheItem* cache) const {
|
| - if (history_info_map_.empty())
|
| + if (private_data_->history_info_map_.empty())
|
| return;
|
| HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
|
| - map_item->set_item_count(history_info_map_.size());
|
| - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
|
| - iter != history_info_map_.end(); ++iter) {
|
| + map_item->set_item_count(private_data_->history_info_map_.size());
|
| + for (HistoryInfoMap::const_iterator iter =
|
| + private_data_->history_info_map_.begin();
|
| + iter != private_data_->history_info_map_.end(); ++iter) {
|
| HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
|
| map_entry->set_history_id(iter->first);
|
| const URLRow& url_row(iter->second);
|
| @@ -1148,7 +1070,7 @@
|
| string16 title(UTF8ToUTF16(iter->title()));
|
| url_row.set_title(title);
|
| }
|
| - history_info_map_[history_id] = url_row;
|
| + private_data_->history_info_map_[history_id] = url_row;
|
| }
|
| return true;
|
| }
|
|
|