Chromium Code Reviews| Index: chrome/browser/history/in_memory_url_index.cc |
| =================================================================== |
| --- chrome/browser/history/in_memory_url_index.cc (revision 106401) |
| +++ chrome/browser/history/in_memory_url_index.cc (working copy) |
| @@ -11,7 +11,6 @@ |
| #include <numeric> |
| #include "base/file_util.h" |
| -#include "base/i18n/break_iterator.h" |
| #include "base/i18n/case_conversion.h" |
| #include "base/metrics/histogram.h" |
| #include "base/string_util.h" |
| @@ -59,23 +58,6 @@ |
| // each of these scores for each factor. |
| const int kScoreRank[] = { 1425, 1200, 900, 400 }; |
| -ScoredHistoryMatch::ScoredHistoryMatch() |
| - : raw_score(0), |
| - can_inline(false) {} |
| - |
| -ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) |
| - : HistoryMatch(url_info, 0, false, false), |
| - raw_score(0), |
| - can_inline(false) {} |
| - |
| -ScoredHistoryMatch::~ScoredHistoryMatch() {} |
| - |
| -// Comparison function for sorting ScoredMatches by their scores. |
| -bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, |
| - const ScoredHistoryMatch& m2) { |
| - return m1.raw_score >= m2.raw_score; |
| -} |
| - |
| InMemoryURLIndex::SearchTermCacheItem::SearchTermCacheItem( |
| const WordIDSet& word_id_set, |
| const HistoryIDSet& history_id_set) |
| @@ -88,11 +70,6 @@ |
| InMemoryURLIndex::SearchTermCacheItem::~SearchTermCacheItem() {} |
| -// Comparison function for sorting TermMatches by their offsets. |
| -bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { |
| - return m1.offset < m2.offset; |
| -} |
| - |
| // Comparison function for sorting search terms by descending length. |
| bool LengthGreater(const string16& string_a, const string16& string_b) { |
| return string_a.length() > string_b.length(); |
| @@ -137,14 +114,14 @@ |
| InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) |
| : history_dir_(history_dir), |
| - history_item_count_(0), |
| + private_data_(new URLIndexPrivateData), |
| cached_at_shutdown_(false) { |
| InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| } |
| // Called only by unit tests. |
| InMemoryURLIndex::InMemoryURLIndex() |
| - : history_item_count_(0), |
| + : private_data_(new URLIndexPrivateData), |
| cached_at_shutdown_(false) { |
| InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| } |
| @@ -170,7 +147,7 @@ |
| // Indexing |
| -bool InMemoryURLIndex::Init(history::URLDatabase* history_db, |
| +bool InMemoryURLIndex::Init(URLDatabase* history_db, |
| const std::string& languages) { |
| // TODO(mrossetti): Register for profile/language change notifications. |
| languages_ = languages; |
| @@ -190,26 +167,40 @@ |
| if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
| return true; |
| + URLID row_id = row.id(); |
| + // Strip out username and password before saving and indexing. |
| string16 url(net::FormatUrl(gurl, languages_, |
| net::kFormatUrlOmitUsernamePassword, |
| UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| NULL, NULL, NULL)); |
| - HistoryID history_id = static_cast<HistoryID>(row.id()); |
| - DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); |
| + HistoryID history_id = static_cast<HistoryID>(row_id); |
| + DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
| // Add the row for quick lookup in the history info store. |
| - URLRow new_row(GURL(url), row.id()); |
| + URLRow new_row(GURL(url), row_id); |
| new_row.set_visit_count(row.visit_count()); |
| new_row.set_typed_count(row.typed_count()); |
| new_row.set_last_visit(row.last_visit()); |
| new_row.set_title(row.title()); |
| - history_info_map_[history_id] = new_row; |
| + private_data_->history_info_map_[history_id] = new_row; |
| + // Index the words contained in the URL and title of the row. |
| + AddRowWordsToIndex(new_row); |
| + return true; |
|
Peter Kasting
2011/10/24 22:01:40
Nit: I just noticed that this function (old and ne
mrossetti
2011/10/25 16:15:05
Done.
|
| +} |
| + |
| +void InMemoryURLIndex::AddRowWordsToIndex(const URLRow& row) { |
| + HistoryID history_id = static_cast<HistoryID>(row.id()); |
| // Split URL into individual, unique words then add in the title words. |
| + const GURL& gurl(row.url()); |
| + string16 url(net::FormatUrl(gurl, languages_, |
| + net::kFormatUrlOmitUsernamePassword, |
| + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, |
| + NULL, NULL, NULL)); |
| url = base::i18n::ToLower(url); |
| - String16Set url_words = WordSetFromString16(url); |
| - String16Set title_words = WordSetFromString16(row.title()); |
| + String16Set url_words = String16SetFromString16(url); |
| + String16Set title_words = String16SetFromString16(row.title()); |
| String16Set words; |
| std::set_union(url_words.begin(), url_words.end(), |
| title_words.begin(), title_words.end(), |
| @@ -218,10 +209,50 @@ |
| word_iter != words.end(); ++word_iter) |
| AddWordToIndex(*word_iter, history_id); |
| - ++history_item_count_; |
| - return true; |
| + search_term_cache_.clear(); // Invalidate the term cache. |
| } |
| +void InMemoryURLIndex::RemoveRowFromIndex(const URLRow& row) { |
| + RemoveRowWordsFromIndex(row); |
| + HistoryID history_id = static_cast<HistoryID>(row.id()); |
| + private_data_->history_info_map_.erase(history_id); |
| +} |
| + |
| +void InMemoryURLIndex::RemoveRowWordsFromIndex(const URLRow& row) { |
| + // Remove the entries in history_id_word_map_ and word_id_history_map_ for |
| + // this row. |
| + URLIndexPrivateData& private_data(*(private_data_.get())); |
| + HistoryID history_id = static_cast<HistoryID>(row.id()); |
| + WordIDSet word_id_set = private_data.history_id_word_map_[history_id]; |
| + private_data.history_id_word_map_.erase(history_id); |
| + |
| + // Reconcile any changes to word usage. |
| + for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
| + word_id_iter != word_id_set.end(); ++word_id_iter) { |
| + WordID word_id = *word_id_iter; |
| + private_data.word_id_history_map_[word_id].erase(history_id); |
| + if (!private_data.word_id_history_map_[word_id].empty()) |
| + continue; // The word is still in use. |
| + |
| + // The word is no longer in use. Reconcile any changes to character usage. |
| + string16 word = private_data.word_list_[word_id]; |
| + Char16Set characters = Char16SetFromString16(word); |
| + for (Char16Set::iterator uni_char_iter = characters.begin(); |
| + uni_char_iter != characters.end(); ++uni_char_iter) { |
| + char16 uni_char = *uni_char_iter; |
| + private_data.char_word_map_[uni_char].erase(word_id); |
| + if (private_data.char_word_map_[uni_char].empty()) |
| + private_data.char_word_map_.erase(uni_char); // No longer in use. |
| + } |
| + |
| + // Complete the removal of references to the word. |
| + private_data.word_id_history_map_.erase(word_id); |
| + private_data.word_map_.erase(word); |
| + private_data.word_list_[word_id] = string16(); |
| + private_data.available_words_.insert(word_id); |
| + } |
| +} |
| + |
| bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, |
| bool clear_cache) { |
| ClearPrivateData(); |
| @@ -248,12 +279,7 @@ |
| } |
| void InMemoryURLIndex::ClearPrivateData() { |
| - history_item_count_ = 0; |
| - word_list_.clear(); |
| - word_map_.clear(); |
| - char_word_map_.clear(); |
| - word_id_history_map_.clear(); |
| - history_info_map_.clear(); |
| + private_data_->Clear(); |
| search_term_cache_.clear(); |
| } |
| @@ -289,10 +315,13 @@ |
| UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
| base::TimeTicks::Now() - beginning_time); |
| - UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); |
| + UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| + private_data_->history_id_word_map_.size()); |
| UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); |
| - UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); |
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| + private_data_->word_map_.size()); |
| + UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| + private_data_->char_word_map_.size()); |
| return true; |
| } |
| @@ -327,29 +356,39 @@ |
| // indexed and it qualifies then it gets indexed. If it is already |
| // indexed and still qualifies then it gets updated, otherwise it |
| // is deleted from the index. |
| - HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
| - if (row_pos == history_info_map_.end()) { |
| + HistoryInfoMap::iterator row_pos = |
| + private_data_->history_info_map_.find(row_id); |
| + if (row_pos == private_data_->history_info_map_.end()) { |
| // This new row should be indexed if it qualifies. |
| - if (RowQualifiesAsSignificant(row, base::Time())) |
| - IndexRow(row); |
| + URLRow new_row(row); |
| + new_row.set_id(row_id); |
| + if (RowQualifiesAsSignificant(new_row, base::Time())) |
| + IndexRow(new_row); |
| } else if (RowQualifiesAsSignificant(row, base::Time())) { |
| // This indexed row still qualifies and will be re-indexed. |
| // The url won't have changed but the title, visit count, etc. |
| // might have changed. |
| - URLRow& old_row = row_pos->second; |
| - old_row.set_visit_count(row.visit_count()); |
| - old_row.set_typed_count(row.typed_count()); |
| - old_row.set_last_visit(row.last_visit()); |
| - // TODO(mrossetti): When we start indexing the title the next line |
| - // will need attention. |
| - old_row.set_title(row.title()); |
| + URLRow& updated_row = row_pos->second; |
| + updated_row.set_visit_count(row.visit_count()); |
| + updated_row.set_typed_count(row.typed_count()); |
| + updated_row.set_last_visit(row.last_visit()); |
| + // While the URL is guaranteed to remain stable, the title may have changed. |
| + // If so, then we need to update the index with the changed words. |
| + if (updated_row.title() != row.title()) { |
| + // Clear all words associated with this row and re-index both the |
| + // URL and title. |
| + RemoveRowWordsFromIndex(updated_row); |
| + updated_row.set_title(row.title()); |
| + AddRowWordsToIndex(updated_row); |
| + } |
| } else { |
| - // This indexed row no longer qualifies and will be de-indexed. |
| - history_info_map_.erase(row_id); |
| + // This indexed row no longer qualifies and will be de-indexed by |
| + // clearing all words associated with this row. |
| + URLRow& removed_row = row_pos->second; |
| + RemoveRowFromIndex(removed_row); |
| } |
| // This invalidates the cache. |
| search_term_cache_.clear(); |
| - // TODO(mrossetti): Record this transaction in the cache. |
| } |
| void InMemoryURLIndex::DeleteURL(URLID row_id) { |
| @@ -358,10 +397,9 @@ |
| // hits against this row until that map is rebuilt, but since the |
| // history_info_map_ no longer references the row no erroneous results |
| // will propagate to the user. |
| - history_info_map_.erase(row_id); |
| + private_data_->history_info_map_.erase(row_id); |
| // This invalidates the word cache. |
| search_term_cache_.clear(); |
| - // TODO(mrossetti): Record this transaction in the cache. |
| } |
| // Searching |
| @@ -369,6 +407,12 @@ |
| ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
| const String16Vector& terms) { |
| ScoredHistoryMatches scored_items; |
| + |
| + // Do nothing if we have indexed no words (probably because we've not been |
| + // initialized yet). |
| + if (private_data_->word_list_.empty()) |
| + return scored_items; |
| + |
| if (!terms.empty()) { |
| // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
| // approach. |
| @@ -426,7 +470,7 @@ |
| iter->second.used_ = false; |
| } |
| -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( |
| +HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( |
| const string16& uni_string) { |
| // Break the terms down into individual terms (words), get the candidate |
| // set for each term, and intersect each to get a final candidate list. |
| @@ -434,7 +478,7 @@ |
| // a string like "http://www.somewebsite.com" which, from our perspective, |
| // is four words: 'http', 'www', 'somewebsite', and 'com'. |
| HistoryIDSet history_id_set; |
| - String16Vector terms = WordVectorFromString16(uni_string, true); |
| + String16Vector terms = String16VectorFromString16(uni_string, true); |
| // Sort the terms into the longest first as such are likely to narrow down |
| // the results quicker. Also, single character terms are the most expensive |
| // to process so save them for last. |
| @@ -461,7 +505,7 @@ |
| return history_id_set; |
| } |
| -InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( |
| +HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( |
| const string16& term) { |
| if (term.empty()) |
| return HistoryIDSet(); |
| @@ -471,7 +515,7 @@ |
| // occuring words in the user's searches. |
| size_t term_length = term.length(); |
| - InMemoryURLIndex::WordIDSet word_id_set; |
| + WordIDSet word_id_set; |
| if (term_length > 1) { |
| // See if this term or a prefix thereof is present in the cache. |
| SearchTermCacheMap::iterator best_prefix(search_term_cache_.end()); |
| @@ -517,7 +561,8 @@ |
| // Reduce the word set with any leftover, unprocessed characters. |
| if (!unique_chars.empty()) { |
| - WordIDSet leftover_set(WordIDSetForTermChars(unique_chars)); |
| + WordIDSet leftover_set( |
| + private_data_->WordIDSetForTermChars(unique_chars)); |
| // We might come up empty on the leftovers. |
| if (leftover_set.empty()) { |
| search_term_cache_[term] = SearchTermCacheItem(); |
| @@ -540,13 +585,15 @@ |
| // contains words which do not have the search term as a proper subset. |
| for (WordIDSet::iterator word_set_iter = word_id_set.begin(); |
| word_set_iter != word_id_set.end(); ) { |
| - if (word_list_[*word_set_iter].find(term) == string16::npos) |
| + if (private_data_->word_list_[*word_set_iter].find(term) == |
| + string16::npos) |
| word_id_set.erase(word_set_iter++); |
| else |
| ++word_set_iter; |
| } |
| } else { |
| - word_id_set = WordIDSetForTermChars(Char16SetFromString16(term)); |
| + word_id_set = |
| + private_data_->WordIDSetForTermChars(Char16SetFromString16(term)); |
| } |
| // If any words resulted then we can compose a set of history IDs by unioning |
| @@ -556,8 +603,9 @@ |
| for (WordIDSet::iterator word_id_iter = word_id_set.begin(); |
| word_id_iter != word_id_set.end(); ++word_id_iter) { |
| WordID word_id = *word_id_iter; |
| - WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); |
| - if (word_iter != word_id_history_map_.end()) { |
| + WordIDHistoryMap::iterator word_iter = |
| + private_data_->word_id_history_map_.find(word_id); |
| + if (word_iter != private_data_->word_id_history_map_.end()) { |
| HistoryIDSet& word_history_id_set(word_iter->second); |
| history_id_set.insert(word_history_id_set.begin(), |
| word_history_id_set.end()); |
| @@ -575,85 +623,53 @@ |
| // Utility Functions |
| -// static |
| -InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( |
| - const string16& uni_string) { |
| - const size_t kMaxWordLength = 64; |
| - String16Vector words = WordVectorFromString16(uni_string, false); |
| - String16Set word_set; |
| - for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); |
| - ++iter) |
| - word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxWordLength)); |
| - return word_set; |
| -} |
| - |
| -// static |
| -InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( |
| - const string16& uni_string, |
| - bool break_on_space) { |
| - base::i18n::BreakIterator iter( |
| - uni_string, |
| - break_on_space ? base::i18n::BreakIterator::BREAK_SPACE |
| - : base::i18n::BreakIterator::BREAK_WORD); |
| - String16Vector words; |
| - if (!iter.Init()) |
| - return words; |
| - while (iter.Advance()) { |
| - if (break_on_space || iter.IsWord()) { |
| - string16 word = iter.GetString(); |
| - if (break_on_space) |
| - TrimWhitespace(word, TRIM_ALL, &word); |
| - if (!word.empty()) |
| - words.push_back(word); |
| - } |
| - } |
| - return words; |
| -} |
| - |
| -// static |
| -InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( |
| - const string16& term) { |
| - Char16Set characters; |
| - for (string16::const_iterator iter = term.begin(); iter != term.end(); |
| - ++iter) |
| - characters.insert(*iter); |
| - return characters; |
| -} |
| - |
| void InMemoryURLIndex::AddWordToIndex(const string16& term, |
| HistoryID history_id) { |
| - WordMap::iterator word_pos = word_map_.find(term); |
| - if (word_pos != word_map_.end()) |
| + WordMap::iterator word_pos = private_data_->word_map_.find(term); |
| + if (word_pos != private_data_->word_map_.end()) |
| UpdateWordHistory(word_pos->second, history_id); |
| else |
| AddWordHistory(term, history_id); |
| } |
| void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { |
| - WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); |
| - DCHECK(history_pos != word_id_history_map_.end()); |
| - HistoryIDSet& history_id_set(history_pos->second); |
| - history_id_set.insert(history_id); |
| + WordIDHistoryMap::iterator history_pos = |
| + private_data_->word_id_history_map_.find(word_id); |
| + DCHECK(history_pos != private_data_->word_id_history_map_.end()); |
| + HistoryIDSet& history_id_set(history_pos->second); |
| + history_id_set.insert(history_id); |
| + private_data_->AddToHistoryIDWordMap(history_id, word_id); |
| } |
| // Add a new word to the word list and the word map, and then create a |
| // new entry in the word/history map. |
| void InMemoryURLIndex::AddWordHistory(const string16& term, |
| HistoryID history_id) { |
| - word_list_.push_back(term); |
| - WordID word_id = word_list_.size() - 1; |
| - word_map_[term] = word_id; |
| + URLIndexPrivateData& private_data(*(private_data_.get())); |
| + WordID word_id = private_data.word_list_.size(); |
| + if (private_data.available_words_.empty()) { |
| + private_data.word_list_.push_back(term); |
| + } else { |
| + word_id = *(private_data.available_words_.begin()); |
| + private_data.word_list_[word_id] = term; |
| + private_data.available_words_.erase(word_id); |
| + } |
| + private_data.word_map_[term] = word_id; |
| + |
| HistoryIDSet history_id_set; |
| history_id_set.insert(history_id); |
| - word_id_history_map_[word_id] = history_id_set; |
| + private_data.word_id_history_map_[word_id] = history_id_set; |
| + private_data.AddToHistoryIDWordMap(history_id, word_id); |
| + |
| // For each character in the newly added word (i.e. a word that is not |
| // already in the word index), add the word to the character index. |
| Char16Set characters = Char16SetFromString16(term); |
| for (Char16Set::iterator uni_char_iter = characters.begin(); |
| uni_char_iter != characters.end(); ++uni_char_iter) { |
| char16 uni_char = *uni_char_iter; |
| - CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); |
| - if (char_iter != char_word_map_.end()) { |
| + CharWordIDMap::iterator char_iter = |
| + private_data.char_word_map_.find(uni_char); |
| + if (char_iter != private_data.char_word_map_.end()) { |
| // Update existing entry in the char/word index. |
| WordIDSet& word_id_set(char_iter->second); |
| word_id_set.insert(word_id); |
| @@ -661,108 +677,13 @@ |
| // Create a new entry in the char/word index. |
| WordIDSet word_id_set; |
| word_id_set.insert(word_id); |
| - char_word_map_[uni_char] = word_id_set; |
| + private_data.char_word_map_[uni_char] = word_id_set; |
| } |
| } |
| } |
| -InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( |
| - const Char16Set& term_chars) { |
| - WordIDSet word_id_set; |
| - for (Char16Set::const_iterator c_iter = term_chars.begin(); |
| - c_iter != term_chars.end(); ++c_iter) { |
| - CharWordIDMap::iterator char_iter = char_word_map_.find(*c_iter); |
| - if (char_iter == char_word_map_.end()) { |
| - // A character was not found so there are no matching results: bail. |
| - word_id_set.clear(); |
| - break; |
| - } |
| - WordIDSet& char_word_id_set(char_iter->second); |
| - // It is possible for there to no longer be any words associated with |
| - // a particular character. Give up in that case. |
| - if (char_word_id_set.empty()) { |
| - word_id_set.clear(); |
| - break; |
| - } |
| - |
| - if (c_iter == term_chars.begin()) { |
| - // First character results becomes base set of results. |
| - word_id_set = char_word_id_set; |
| - } else { |
| - // Subsequent character results get intersected in. |
| - WordIDSet new_word_id_set; |
| - std::set_intersection(word_id_set.begin(), word_id_set.end(), |
| - char_word_id_set.begin(), char_word_id_set.end(), |
| - std::inserter(new_word_id_set, |
| - new_word_id_set.begin())); |
| - word_id_set.swap(new_word_id_set); |
| - } |
| - } |
| - return word_id_set; |
| -} |
| - |
| // static |
| -TermMatches InMemoryURLIndex::MatchTermInString(const string16& term, |
| - const string16& string, |
| - int term_num) { |
| - const size_t kMaxCompareLength = 2048; |
| - const string16& short_string = (string.length() > kMaxCompareLength) ? |
| - string.substr(0, kMaxCompareLength) : string; |
| - TermMatches matches; |
| - for (size_t location = short_string.find(term); location != string16::npos; |
| - location = short_string.find(term, location + 1)) { |
| - matches.push_back(TermMatch(term_num, location, term.length())); |
| - } |
| - return matches; |
| -} |
| - |
| -// static |
| -TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { |
| - if (matches.empty()) |
| - return matches; |
| - TermMatches sorted_matches = matches; |
| - std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess); |
| - TermMatches clean_matches; |
| - TermMatch last_match = sorted_matches[0]; |
| - clean_matches.push_back(last_match); |
| - for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; |
| - iter != sorted_matches.end(); ++iter) { |
| - if (iter->offset >= last_match.offset + last_match.length) { |
| - last_match = *iter; |
| - clean_matches.push_back(last_match); |
| - } |
| - } |
| - return clean_matches; |
| -} |
| - |
| -// static |
| -std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( |
| - const TermMatches& matches) { |
| - std::vector<size_t> offsets; |
| - for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) |
| - offsets.push_back(i->offset); |
| - return offsets; |
| -} |
| - |
| -// static |
| -TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( |
| - const TermMatches& matches, |
| - const std::vector<size_t>& offsets) { |
| - DCHECK_EQ(matches.size(), offsets.size()); |
| - TermMatches new_matches; |
| - std::vector<size_t>::const_iterator offset_iter = offsets.begin(); |
| - for (TermMatches::const_iterator term_iter = matches.begin(); |
| - term_iter != matches.end(); ++term_iter, ++offset_iter) { |
| - if (*offset_iter != string16::npos) { |
| - TermMatch new_match(*term_iter); |
| - new_match.offset = *offset_iter; |
| - new_matches.push_back(new_match); |
| - } |
| - } |
| - return new_matches; |
| -} |
| - |
| -// static |
| +// TODO(mrossetti): This can be made a ctor for ScoredHistoryMatch. |
| ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( |
| const URLRow& row, |
| const String16Vector& terms) { |
| @@ -791,8 +712,8 @@ |
| } |
| // Sort matches by offset and eliminate any which overlap. |
| - match.url_matches = SortAndDeoverlap(match.url_matches); |
| - match.title_matches = SortAndDeoverlap(match.title_matches); |
| + match.url_matches = SortAndDeoverlapMatches(match.url_matches); |
| + match.title_matches = SortAndDeoverlapMatches(match.title_matches); |
| // We should not (currently) inline autocomplete a result unless both of the |
| // following are true: |
| @@ -906,19 +827,17 @@ |
| const InMemoryURLIndex& index, |
| const String16Vector& lower_terms) |
| : index_(index), |
| - lower_terms_(lower_terms) { |
| -} |
| + lower_terms_(lower_terms) {} |
| InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} |
| -void InMemoryURLIndex::AddHistoryMatch::operator()( |
| - const InMemoryURLIndex::HistoryID history_id) { |
| +void InMemoryURLIndex::AddHistoryMatch::operator()(const HistoryID history_id) { |
| HistoryInfoMap::const_iterator hist_pos = |
| - index_.history_info_map_.find(history_id); |
| + index_.private_data_->history_info_map_.find(history_id); |
| // Note that a history_id may be present in the word_id_history_map_ yet not |
| // be found in the history_info_map_. This occurs when an item has been |
| // deleted by the user or the item no longer qualifies as a quick result. |
| - if (hist_pos != index_.history_info_map_.end()) { |
| + if (hist_pos != index_.private_data_->history_info_map_.end()) { |
| const URLRow& hist_item = hist_pos->second; |
| ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); |
| if (match.raw_score > 0) |
| @@ -940,7 +859,9 @@ |
| void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { |
| DCHECK(cache); |
| cache->set_timestamp(base::Time::Now().ToInternalValue()); |
| - cache->set_history_item_count(history_item_count_); |
| + // history_item_count_ is no longer used but rather than change the protobuf |
| + // definition use a placeholder. This will go away with the switch to SQLite. |
| + cache->set_history_item_count(0); |
| SaveWordList(cache); |
| SaveWordMap(cache); |
| SaveCharWordMap(cache); |
| @@ -951,20 +872,18 @@ |
| bool InMemoryURLIndex::RestorePrivateData( |
| const InMemoryURLIndexCacheItem& cache) { |
| last_saved_ = base::Time::FromInternalValue(cache.timestamp()); |
| - history_item_count_ = cache.history_item_count(); |
| - return (history_item_count_ == 0) || (RestoreWordList(cache) && |
| - RestoreWordMap(cache) && RestoreCharWordMap(cache) && |
| - RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); |
| + return RestoreWordList(cache) && RestoreWordMap(cache) && |
| + RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
| + RestoreHistoryInfoMap(cache); |
| } |
| - |
| void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { |
| - if (word_list_.empty()) |
| + if (private_data_->word_list_.empty()) |
| return; |
| WordListItem* list_item = cache->mutable_word_list(); |
| - list_item->set_word_count(word_list_.size()); |
| - for (String16Vector::const_iterator iter = word_list_.begin(); |
| - iter != word_list_.end(); ++iter) |
| + list_item->set_word_count(private_data_->word_list_.size()); |
| + for (String16Vector::const_iterator iter = private_data_->word_list_.begin(); |
| + iter != private_data_->word_list_.end(); ++iter) |
| list_item->add_word(UTF16ToUTF8(*iter)); |
| } |
| @@ -979,17 +898,17 @@ |
| const RepeatedPtrField<std::string>& words(list_item.word()); |
| for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); |
| iter != words.end(); ++iter) |
| - word_list_.push_back(UTF8ToUTF16(*iter)); |
| + private_data_->word_list_.push_back(UTF8ToUTF16(*iter)); |
| return true; |
| } |
| void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { |
| - if (word_map_.empty()) |
| + if (private_data_->word_map_.empty()) |
| return; |
| WordMapItem* map_item = cache->mutable_word_map(); |
| - map_item->set_item_count(word_map_.size()); |
| - for (WordMap::const_iterator iter = word_map_.begin(); |
| - iter != word_map_.end(); ++iter) { |
| + map_item->set_item_count(private_data_->word_map_.size()); |
| + for (WordMap::const_iterator iter = private_data_->word_map_.begin(); |
| + iter != private_data_->word_map_.end(); ++iter) { |
| WordMapEntry* map_entry = map_item->add_word_map_entry(); |
| map_entry->set_word(UTF16ToUTF8(iter->first)); |
| map_entry->set_word_id(iter->second); |
| @@ -1007,17 +926,18 @@ |
| const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); |
| for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); |
| iter != entries.end(); ++iter) |
| - word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); |
| + private_data_->word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); |
| return true; |
| } |
| void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { |
| - if (char_word_map_.empty()) |
| + if (private_data_->char_word_map_.empty()) |
| return; |
| CharWordMapItem* map_item = cache->mutable_char_word_map(); |
| - map_item->set_item_count(char_word_map_.size()); |
| - for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); |
| - iter != char_word_map_.end(); ++iter) { |
| + map_item->set_item_count(private_data_->char_word_map_.size()); |
| + for (CharWordIDMap::const_iterator iter = |
| + private_data_->char_word_map_.begin(); |
| + iter != private_data_->char_word_map_.end(); ++iter) { |
| CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); |
| map_entry->set_char_16(iter->first); |
| const WordIDSet& word_id_set(iter->second); |
| @@ -1051,19 +971,20 @@ |
| for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); |
| jiter != word_ids.end(); ++jiter) |
| word_id_set.insert(*jiter); |
| - char_word_map_[uni_char] = word_id_set; |
| + private_data_->char_word_map_[uni_char] = word_id_set; |
| } |
| return true; |
| } |
| void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) |
| const { |
| - if (word_id_history_map_.empty()) |
| + if (private_data_->word_id_history_map_.empty()) |
| return; |
| WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); |
| - map_item->set_item_count(word_id_history_map_.size()); |
| - for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); |
| - iter != word_id_history_map_.end(); ++iter) { |
| + map_item->set_item_count(private_data_->word_id_history_map_.size()); |
| + for (WordIDHistoryMap::const_iterator iter = |
| + private_data_->word_id_history_map_.begin(); |
| + iter != private_data_->word_id_history_map_.end(); ++iter) { |
| WordIDHistoryMapEntry* map_entry = |
| map_item->add_word_id_history_map_entry(); |
| map_entry->set_word_id(iter->first); |
| @@ -1096,21 +1017,24 @@ |
| HistoryIDSet history_id_set; |
| const RepeatedField<int64>& history_ids(iter->history_id()); |
| for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); |
| - jiter != history_ids.end(); ++jiter) |
| + jiter != history_ids.end(); ++jiter) { |
| history_id_set.insert(*jiter); |
| - word_id_history_map_[word_id] = history_id_set; |
| + private_data_->AddToHistoryIDWordMap(*jiter, word_id); |
| + } |
| + private_data_->word_id_history_map_[word_id] = history_id_set; |
| } |
| return true; |
| } |
| void InMemoryURLIndex::SaveHistoryInfoMap( |
| InMemoryURLIndexCacheItem* cache) const { |
| - if (history_info_map_.empty()) |
| + if (private_data_->history_info_map_.empty()) |
| return; |
| HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
| - map_item->set_item_count(history_info_map_.size()); |
| - for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
| - iter != history_info_map_.end(); ++iter) { |
| + map_item->set_item_count(private_data_->history_info_map_.size()); |
| + for (HistoryInfoMap::const_iterator iter = |
| + private_data_->history_info_map_.begin(); |
| + iter != private_data_->history_info_map_.end(); ++iter) { |
| HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
| map_entry->set_history_id(iter->first); |
| const URLRow& url_row(iter->second); |
| @@ -1148,7 +1072,7 @@ |
| string16 title(UTF8ToUTF16(iter->title())); |
| url_row.set_title(title); |
| } |
| - history_info_map_[history_id] = url_row; |
| + private_data_->history_info_map_[history_id] = url_row; |
| } |
| return true; |
| } |